| Line Number |
../DebugInfoTest/example_mips_dbg.ll
BUT NOT
../DebugInfoTest/example_mips.ll
|
Line Number |
../DebugInfoTest/example_mips.ll
BUT NOT
../DebugInfoTest/example_mips_dbg.ll
|
| 1 |
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// |
1 |
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// |
| 2 |
// |
2 |
// |
| 3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 |
// See https://llvm.org/LICENSE.txt for license information. |
4 |
// See https://llvm.org/LICENSE.txt for license information. |
| 5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 |
// |
6 |
// |
| 7 |
//===----------------------------------------------------------------------===// |
7 |
//===----------------------------------------------------------------------===// |
| 8 |
// |
8 |
// |
| 9 |
// This file defines a DAG pattern matching instruction selector for X86, |
9 |
// This file defines a DAG pattern matching instruction selector for X86, |
| 10 |
// converting from a legalized dag to a X86 dag. |
10 |
// converting from a legalized dag to a X86 dag. |
| 11 |
// |
11 |
// |
| 12 |
//===----------------------------------------------------------------------===// |
12 |
//===----------------------------------------------------------------------===// |
| 13 |
|
13 |
|
| 14 |
#include "X86.h" |
14 |
#include "X86.h" |
| 15 |
#include "X86MachineFunctionInfo.h" |
15 |
#include "X86MachineFunctionInfo.h" |
| 16 |
#include "X86RegisterInfo.h" |
16 |
#include "X86RegisterInfo.h" |
| 17 |
#include "X86Subtarget.h" |
17 |
#include "X86Subtarget.h" |
| 18 |
#include "X86TargetMachine.h" |
18 |
#include "X86TargetMachine.h" |
| 19 |
#include "llvm/ADT/Statistic.h" |
19 |
#include "llvm/ADT/Statistic.h" |
| 20 |
#include "llvm/CodeGen/MachineModuleInfo.h" |
20 |
#include "llvm/CodeGen/MachineModuleInfo.h" |
| 21 |
#include "llvm/CodeGen/SelectionDAGISel.h" |
21 |
#include "llvm/CodeGen/SelectionDAGISel.h" |
| 22 |
#include "llvm/Config/llvm-config.h" |
22 |
#include "llvm/Config/llvm-config.h" |
| 23 |
#include "llvm/IR/ConstantRange.h" |
23 |
#include "llvm/IR/ConstantRange.h" |
| 24 |
#include "llvm/IR/Function.h" |
24 |
#include "llvm/IR/Function.h" |
| 25 |
#include "llvm/IR/Instructions.h" |
25 |
#include "llvm/IR/Instructions.h" |
| 26 |
#include "llvm/IR/Intrinsics.h" |
26 |
#include "llvm/IR/Intrinsics.h" |
| 27 |
#include "llvm/IR/IntrinsicsX86.h" |
27 |
#include "llvm/IR/IntrinsicsX86.h" |
| 28 |
#include "llvm/IR/Type.h" |
28 |
#include "llvm/IR/Type.h" |
| 29 |
#include "llvm/Support/Debug.h" |
29 |
#include "llvm/Support/Debug.h" |
| 30 |
#include "llvm/Support/ErrorHandling.h" |
30 |
#include "llvm/Support/ErrorHandling.h" |
| 31 |
#include "llvm/Support/KnownBits.h" |
31 |
#include "llvm/Support/KnownBits.h" |
| 32 |
#include "llvm/Support/MathExtras.h" |
32 |
#include "llvm/Support/MathExtras.h" |
| 33 |
#include |
33 |
#include |
| 34 |
|
34 |
|
| 35 |
using namespace llvm; |
35 |
using namespace llvm; |
| 36 |
|
36 |
|
| 37 |
#define DEBUG_TYPE "x86-isel" |
37 |
#define DEBUG_TYPE "x86-isel" |
| 38 |
#define PASS_NAME "X86 DAG->DAG Instruction Selection" |
38 |
#define PASS_NAME "X86 DAG->DAG Instruction Selection" |
| 39 |
|
39 |
|
| 40 |
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); |
40 |
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); |
| 41 |
|
41 |
|
| 42 |
static cl::opt AndImmShrink("x86-and-imm-shrink", cl::init(true), |
42 |
static cl::opt AndImmShrink("x86-and-imm-shrink", cl::init(true), |
| 43 |
cl::desc("Enable setting constant bits to reduce size of mask immediates"), |
43 |
cl::desc("Enable setting constant bits to reduce size of mask immediates"), |
| 44 |
cl::Hidden); |
44 |
cl::Hidden); |
| 45 |
|
45 |
|
| 46 |
static cl::opt EnablePromoteAnyextLoad( |
46 |
static cl::opt EnablePromoteAnyextLoad( |
| 47 |
"x86-promote-anyext-load", cl::init(true), |
47 |
"x86-promote-anyext-load", cl::init(true), |
| 48 |
cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); |
48 |
cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); |
| 49 |
|
49 |
|
| 50 |
extern cl::opt IndirectBranchTracking; |
50 |
extern cl::opt IndirectBranchTracking; |
| 51 |
|
51 |
|
| 52 |
//===----------------------------------------------------------------------===// |
52 |
//===----------------------------------------------------------------------===// |
| 53 |
// Pattern Matcher Implementation |
53 |
// Pattern Matcher Implementation |
| 54 |
//===----------------------------------------------------------------------===// |
54 |
//===----------------------------------------------------------------------===// |
| 55 |
|
55 |
|
| 56 |
namespace { |
56 |
namespace { |
| 57 |
/// This corresponds to X86AddressMode, but uses SDValue's instead of register |
57 |
/// This corresponds to X86AddressMode, but uses SDValue's instead of register |
| 58 |
/// numbers for the leaves of the matched tree. |
58 |
/// numbers for the leaves of the matched tree. |
| 59 |
struct X86ISelAddressMode { |
59 |
struct X86ISelAddressMode { |
| 60 |
enum { |
60 |
enum { |
| 61 |
RegBase, |
61 |
RegBase, |
| 62 |
FrameIndexBase |
62 |
FrameIndexBase |
| 63 |
} BaseType = RegBase; |
63 |
} BaseType = RegBase; |
| 64 |
|
64 |
|
| 65 |
// This is really a union, discriminated by BaseType! |
65 |
// This is really a union, discriminated by BaseType! |
| 66 |
SDValue Base_Reg; |
66 |
SDValue Base_Reg; |
| 67 |
int Base_FrameIndex = 0; |
67 |
int Base_FrameIndex = 0; |
| 68 |
|
68 |
|
| 69 |
unsigned Scale = 1; |
69 |
unsigned Scale = 1; |
| 70 |
SDValue IndexReg; |
70 |
SDValue IndexReg; |
| 71 |
int32_t Disp = 0; |
71 |
int32_t Disp = 0; |
| 72 |
SDValue Segment; |
72 |
SDValue Segment; |
| 73 |
const GlobalValue *GV = nullptr; |
73 |
const GlobalValue *GV = nullptr; |
| 74 |
const Constant *CP = nullptr; |
74 |
const Constant *CP = nullptr; |
| 75 |
const BlockAddress *BlockAddr = nullptr; |
75 |
const BlockAddress *BlockAddr = nullptr; |
| 76 |
const char *ES = nullptr; |
76 |
const char *ES = nullptr; |
| 77 |
MCSymbol *MCSym = nullptr; |
77 |
MCSymbol *MCSym = nullptr; |
| 78 |
int JT = -1; |
78 |
int JT = -1; |
| 79 |
Align Alignment; // CP alignment. |
79 |
Align Alignment; // CP alignment. |
| 80 |
unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_* |
80 |
unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_* |
| 81 |
bool NegateIndex = false; |
81 |
bool NegateIndex = false; |
| 82 |
|
82 |
|
| 83 |
X86ISelAddressMode() = default; |
83 |
X86ISelAddressMode() = default; |
| 84 |
|
84 |
|
| 85 |
bool hasSymbolicDisplacement() const { |
85 |
bool hasSymbolicDisplacement() const { |
| 86 |
return GV != nullptr || CP != nullptr || ES != nullptr || |
86 |
return GV != nullptr || CP != nullptr || ES != nullptr || |
| 87 |
MCSym != nullptr || JT != -1 || BlockAddr != nullptr; |
87 |
MCSym != nullptr || JT != -1 || BlockAddr != nullptr; |
| 88 |
} |
88 |
} |
| 89 |
|
89 |
|
| 90 |
bool hasBaseOrIndexReg() const { |
90 |
bool hasBaseOrIndexReg() const { |
| 91 |
return BaseType == FrameIndexBase || |
91 |
return BaseType == FrameIndexBase || |
| 92 |
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; |
92 |
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; |
| 93 |
} |
93 |
} |
| 94 |
|
94 |
|
| 95 |
/// Return true if this addressing mode is already RIP-relative. |
95 |
/// Return true if this addressing mode is already RIP-relative. |
| 96 |
bool isRIPRelative() const { |
96 |
bool isRIPRelative() const { |
| 97 |
if (BaseType != RegBase) return false; |
97 |
if (BaseType != RegBase) return false; |
| 98 |
if (RegisterSDNode *RegNode = |
98 |
if (RegisterSDNode *RegNode = |
| 99 |
dyn_cast_or_null(Base_Reg.getNode())) |
99 |
dyn_cast_or_null(Base_Reg.getNode())) |
| 100 |
return RegNode->getReg() == X86::RIP; |
100 |
return RegNode->getReg() == X86::RIP; |
| 101 |
return false; |
101 |
return false; |
| 102 |
} |
102 |
} |
| 103 |
|
103 |
|
| 104 |
void setBaseReg(SDValue Reg) { |
104 |
void setBaseReg(SDValue Reg) { |
| 105 |
BaseType = RegBase; |
105 |
BaseType = RegBase; |
| 106 |
Base_Reg = Reg; |
106 |
Base_Reg = Reg; |
| 107 |
} |
107 |
} |
| 108 |
|
108 |
|
| 109 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
109 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 110 |
void dump(SelectionDAG *DAG = nullptr) { |
110 |
void dump(SelectionDAG *DAG = nullptr) { |
| 111 |
dbgs() << "X86ISelAddressMode " << this << '\n'; |
111 |
dbgs() << "X86ISelAddressMode " << this << '\n'; |
| 112 |
dbgs() << "Base_Reg "; |
112 |
dbgs() << "Base_Reg "; |
| 113 |
if (Base_Reg.getNode()) |
113 |
if (Base_Reg.getNode()) |
| 114 |
Base_Reg.getNode()->dump(DAG); |
114 |
Base_Reg.getNode()->dump(DAG); |
| 115 |
else |
115 |
else |
| 116 |
dbgs() << "nul\n"; |
116 |
dbgs() << "nul\n"; |
| 117 |
if (BaseType == FrameIndexBase) |
117 |
if (BaseType == FrameIndexBase) |
| 118 |
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; |
118 |
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; |
| 119 |
dbgs() << " Scale " << Scale << '\n' |
119 |
dbgs() << " Scale " << Scale << '\n' |
| 120 |
<< "IndexReg "; |
120 |
<< "IndexReg "; |
| 121 |
if (NegateIndex) |
121 |
if (NegateIndex) |
| 122 |
dbgs() << "negate "; |
122 |
dbgs() << "negate "; |
| 123 |
if (IndexReg.getNode()) |
123 |
if (IndexReg.getNode()) |
| 124 |
IndexReg.getNode()->dump(DAG); |
124 |
IndexReg.getNode()->dump(DAG); |
| 125 |
else |
125 |
else |
| 126 |
dbgs() << "nul\n"; |
126 |
dbgs() << "nul\n"; |
| 127 |
dbgs() << " Disp " << Disp << '\n' |
127 |
dbgs() << " Disp " << Disp << '\n' |
| 128 |
<< "GV "; |
128 |
<< "GV "; |
| 129 |
if (GV) |
129 |
if (GV) |
| 130 |
GV->dump(); |
130 |
GV->dump(); |
| 131 |
else |
131 |
else |
| 132 |
dbgs() << "nul"; |
132 |
dbgs() << "nul"; |
| 133 |
dbgs() << " CP "; |
133 |
dbgs() << " CP "; |
| 134 |
if (CP) |
134 |
if (CP) |
| 135 |
CP->dump(); |
135 |
CP->dump(); |
| 136 |
else |
136 |
else |
| 137 |
dbgs() << "nul"; |
137 |
dbgs() << "nul"; |
| 138 |
dbgs() << '\n' |
138 |
dbgs() << '\n' |
| 139 |
<< "ES "; |
139 |
<< "ES "; |
| 140 |
if (ES) |
140 |
if (ES) |
| 141 |
dbgs() << ES; |
141 |
dbgs() << ES; |
| 142 |
else |
142 |
else |
| 143 |
dbgs() << "nul"; |
143 |
dbgs() << "nul"; |
| 144 |
dbgs() << " MCSym "; |
144 |
dbgs() << " MCSym "; |
| 145 |
if (MCSym) |
145 |
if (MCSym) |
| 146 |
dbgs() << MCSym; |
146 |
dbgs() << MCSym; |
| 147 |
else |
147 |
else |
| 148 |
dbgs() << "nul"; |
148 |
dbgs() << "nul"; |
| 149 |
dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n'; |
149 |
dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n'; |
| 150 |
} |
150 |
} |
| 151 |
#endif |
151 |
#endif |
| 152 |
}; |
152 |
}; |
| 153 |
} |
153 |
} |
| 154 |
|
154 |
|
| 155 |
namespace { |
155 |
namespace { |
| 156 |
//===--------------------------------------------------------------------===// |
156 |
//===--------------------------------------------------------------------===// |
| 157 |
/// ISel - X86-specific code to select X86 machine instructions for |
157 |
/// ISel - X86-specific code to select X86 machine instructions for |
| 158 |
/// SelectionDAG operations. |
158 |
/// SelectionDAG operations. |
| 159 |
/// |
159 |
/// |
| 160 |
class X86DAGToDAGISel final : public SelectionDAGISel { |
160 |
class X86DAGToDAGISel final : public SelectionDAGISel { |
| 161 |
/// Keep a pointer to the X86Subtarget around so that we can |
161 |
/// Keep a pointer to the X86Subtarget around so that we can |
| 162 |
/// make the right decision when generating code for different targets. |
162 |
/// make the right decision when generating code for different targets. |
| 163 |
const X86Subtarget *Subtarget; |
163 |
const X86Subtarget *Subtarget; |
| 164 |
|
164 |
|
| 165 |
/// If true, selector should try to optimize for minimum code size. |
165 |
/// If true, selector should try to optimize for minimum code size. |
| 166 |
bool OptForMinSize; |
166 |
bool OptForMinSize; |
| 167 |
|
167 |
|
| 168 |
/// Disable direct TLS access through segment registers. |
168 |
/// Disable direct TLS access through segment registers. |
| 169 |
bool IndirectTlsSegRefs; |
169 |
bool IndirectTlsSegRefs; |
| 170 |
|
170 |
|
| 171 |
public: |
171 |
public: |
| 172 |
static char ID; |
172 |
static char ID; |
| 173 |
|
173 |
|
| 174 |
X86DAGToDAGISel() = delete; |
174 |
X86DAGToDAGISel() = delete; |
| 175 |
|
175 |
|
| 176 |
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) |
176 |
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) |
| 177 |
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr), |
177 |
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr), |
| 178 |
OptForMinSize(false), IndirectTlsSegRefs(false) {} |
178 |
OptForMinSize(false), IndirectTlsSegRefs(false) {} |
| 179 |
|
179 |
|
| 180 |
bool runOnMachineFunction(MachineFunction &MF) override { |
180 |
bool runOnMachineFunction(MachineFunction &MF) override { |
| 181 |
// Reset the subtarget each time through. |
181 |
// Reset the subtarget each time through. |
| 182 |
Subtarget = &MF.getSubtarget(); |
182 |
Subtarget = &MF.getSubtarget(); |
| 183 |
IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( |
183 |
IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( |
| 184 |
"indirect-tls-seg-refs"); |
184 |
"indirect-tls-seg-refs"); |
| 185 |
|
185 |
|
| 186 |
// OptFor[Min]Size are used in pattern predicates that isel is matching. |
186 |
// OptFor[Min]Size are used in pattern predicates that isel is matching. |
| 187 |
OptForMinSize = MF.getFunction().hasMinSize(); |
187 |
OptForMinSize = MF.getFunction().hasMinSize(); |
| 188 |
assert((!OptForMinSize || MF.getFunction().hasOptSize()) && |
188 |
assert((!OptForMinSize || MF.getFunction().hasOptSize()) && |
| 189 |
"OptForMinSize implies OptForSize"); |
189 |
"OptForMinSize implies OptForSize"); |
| 190 |
|
190 |
|
| 191 |
SelectionDAGISel::runOnMachineFunction(MF); |
191 |
SelectionDAGISel::runOnMachineFunction(MF); |
| 192 |
return true; |
192 |
return true; |
| 193 |
} |
193 |
} |
| 194 |
|
194 |
|
| 195 |
void emitFunctionEntryCode() override; |
195 |
void emitFunctionEntryCode() override; |
| 196 |
|
196 |
|
| 197 |
bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; |
197 |
bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; |
| 198 |
|
198 |
|
| 199 |
void PreprocessISelDAG() override; |
199 |
void PreprocessISelDAG() override; |
| 200 |
void PostprocessISelDAG() override; |
200 |
void PostprocessISelDAG() override; |
| 201 |
|
201 |
|
| 202 |
// Include the pieces autogenerated from the target description. |
202 |
// Include the pieces autogenerated from the target description. |
| 203 |
#include "X86GenDAGISel.inc" |
203 |
#include "X86GenDAGISel.inc" |
| 204 |
|
204 |
|
| 205 |
private: |
205 |
private: |
| 206 |
void Select(SDNode *N) override; |
206 |
void Select(SDNode *N) override; |
| 207 |
|
207 |
|
| 208 |
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); |
208 |
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); |
| 209 |
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
209 |
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
| 210 |
bool AllowSegmentRegForX32 = false); |
210 |
bool AllowSegmentRegForX32 = false); |
| 211 |
bool matchWrapper(SDValue N, X86ISelAddressMode &AM); |
211 |
bool matchWrapper(SDValue N, X86ISelAddressMode &AM); |
| 212 |
bool matchAddress(SDValue N, X86ISelAddressMode &AM); |
212 |
bool matchAddress(SDValue N, X86ISelAddressMode &AM); |
| 213 |
bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); |
213 |
bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); |
| 214 |
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); |
214 |
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); |
| 215 |
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
215 |
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
| 216 |
unsigned Depth); |
216 |
unsigned Depth); |
| 217 |
bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
217 |
bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
| 218 |
unsigned Depth); |
218 |
unsigned Depth); |
| 219 |
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); |
219 |
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); |
| 220 |
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
220 |
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
| 221 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
221 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
| 222 |
SDValue &Segment); |
222 |
SDValue &Segment); |
| 223 |
bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, |
223 |
bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, |
| 224 |
SDValue ScaleOp, SDValue &Base, SDValue &Scale, |
224 |
SDValue ScaleOp, SDValue &Base, SDValue &Scale, |
| 225 |
SDValue &Index, SDValue &Disp, SDValue &Segment); |
225 |
SDValue &Index, SDValue &Disp, SDValue &Segment); |
| 226 |
bool selectMOV64Imm32(SDValue N, SDValue &Imm); |
226 |
bool selectMOV64Imm32(SDValue N, SDValue &Imm); |
| 227 |
bool selectLEAAddr(SDValue N, SDValue &Base, |
227 |
bool selectLEAAddr(SDValue N, SDValue &Base, |
| 228 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
228 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
| 229 |
SDValue &Segment); |
229 |
SDValue &Segment); |
| 230 |
bool selectLEA64_32Addr(SDValue N, SDValue &Base, |
230 |
bool selectLEA64_32Addr(SDValue N, SDValue &Base, |
| 231 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
231 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
| 232 |
SDValue &Segment); |
232 |
SDValue &Segment); |
| 233 |
bool selectTLSADDRAddr(SDValue N, SDValue &Base, |
233 |
bool selectTLSADDRAddr(SDValue N, SDValue &Base, |
| 234 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
234 |
SDValue &Scale, SDValue &Index, SDValue &Disp, |
| 235 |
SDValue &Segment); |
235 |
SDValue &Segment); |
| 236 |
bool selectRelocImm(SDValue N, SDValue &Op); |
236 |
bool selectRelocImm(SDValue N, SDValue &Op); |
| 237 |
|
237 |
|
| 238 |
bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
238 |
bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
| 239 |
SDValue &Base, SDValue &Scale, |
239 |
SDValue &Base, SDValue &Scale, |
| 240 |
SDValue &Index, SDValue &Disp, |
240 |
SDValue &Index, SDValue &Disp, |
| 241 |
SDValue &Segment); |
241 |
SDValue &Segment); |
| 242 |
|
242 |
|
| 243 |
// Convenience method where P is also root. |
243 |
// Convenience method where P is also root. |
| 244 |
bool tryFoldLoad(SDNode *P, SDValue N, |
244 |
bool tryFoldLoad(SDNode *P, SDValue N, |
| 245 |
SDValue &Base, SDValue &Scale, |
245 |
SDValue &Base, SDValue &Scale, |
| 246 |
SDValue &Index, SDValue &Disp, |
246 |
SDValue &Index, SDValue &Disp, |
| 247 |
SDValue &Segment) { |
247 |
SDValue &Segment) { |
| 248 |
return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); |
248 |
return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); |
| 249 |
} |
249 |
} |
| 250 |
|
250 |
|
| 251 |
bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
251 |
bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
| 252 |
SDValue &Base, SDValue &Scale, |
252 |
SDValue &Base, SDValue &Scale, |
| 253 |
SDValue &Index, SDValue &Disp, |
253 |
SDValue &Index, SDValue &Disp, |
| 254 |
SDValue &Segment); |
254 |
SDValue &Segment); |
| 255 |
|
255 |
|
| 256 |
bool isProfitableToFormMaskedOp(SDNode *N) const; |
256 |
bool isProfitableToFormMaskedOp(SDNode *N) const; |
| 257 |
|
257 |
|
| 258 |
/// Implement addressing mode selection for inline asm expressions. |
258 |
/// Implement addressing mode selection for inline asm expressions. |
| 259 |
bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
259 |
bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
| 260 |
unsigned ConstraintID, |
260 |
unsigned ConstraintID, |
| 261 |
std::vector &OutOps) override; |
261 |
std::vector &OutOps) override; |
| 262 |
|
262 |
|
| 263 |
void emitSpecialCodeForMain(); |
263 |
void emitSpecialCodeForMain(); |
| 264 |
|
264 |
|
| 265 |
inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, |
265 |
inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, |
| 266 |
MVT VT, SDValue &Base, SDValue &Scale, |
266 |
MVT VT, SDValue &Base, SDValue &Scale, |
| 267 |
SDValue &Index, SDValue &Disp, |
267 |
SDValue &Index, SDValue &Disp, |
| 268 |
SDValue &Segment) { |
268 |
SDValue &Segment) { |
| 269 |
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
269 |
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
| 270 |
Base = CurDAG->getTargetFrameIndex( |
270 |
Base = CurDAG->getTargetFrameIndex( |
| 271 |
AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout())); |
271 |
AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout())); |
| 272 |
else if (AM.Base_Reg.getNode()) |
272 |
else if (AM.Base_Reg.getNode()) |
| 273 |
Base = AM.Base_Reg; |
273 |
Base = AM.Base_Reg; |
| 274 |
else |
274 |
else |
| 275 |
Base = CurDAG->getRegister(0, VT); |
275 |
Base = CurDAG->getRegister(0, VT); |
| 276 |
|
276 |
|
| 277 |
Scale = getI8Imm(AM.Scale, DL); |
277 |
Scale = getI8Imm(AM.Scale, DL); |
| 278 |
|
278 |
|
| 279 |
// Negate the index if needed. |
279 |
// Negate the index if needed. |
| 280 |
if (AM.NegateIndex) { |
280 |
if (AM.NegateIndex) { |
| 281 |
unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r; |
281 |
unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r; |
| 282 |
SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32, |
282 |
SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32, |
| 283 |
AM.IndexReg), 0); |
283 |
AM.IndexReg), 0); |
| 284 |
AM.IndexReg = Neg; |
284 |
AM.IndexReg = Neg; |
| 285 |
} |
285 |
} |
| 286 |
|
286 |
|
| 287 |
if (AM.IndexReg.getNode()) |
287 |
if (AM.IndexReg.getNode()) |
| 288 |
Index = AM.IndexReg; |
288 |
Index = AM.IndexReg; |
| 289 |
else |
289 |
else |
| 290 |
Index = CurDAG->getRegister(0, VT); |
290 |
Index = CurDAG->getRegister(0, VT); |
| 291 |
|
291 |
|
| 292 |
// These are 32-bit even in 64-bit mode since RIP-relative offset |
292 |
// These are 32-bit even in 64-bit mode since RIP-relative offset |
| 293 |
// is 32-bit. |
293 |
// is 32-bit. |
| 294 |
if (AM.GV) |
294 |
if (AM.GV) |
| 295 |
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), |
295 |
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), |
| 296 |
MVT::i32, AM.Disp, |
296 |
MVT::i32, AM.Disp, |
| 297 |
AM.SymbolFlags); |
297 |
AM.SymbolFlags); |
| 298 |
else if (AM.CP) |
298 |
else if (AM.CP) |
| 299 |
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment, |
299 |
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment, |
| 300 |
AM.Disp, AM.SymbolFlags); |
300 |
AM.Disp, AM.SymbolFlags); |
| 301 |
else if (AM.ES) { |
301 |
else if (AM.ES) { |
| 302 |
assert(!AM.Disp && "Non-zero displacement is ignored with ES."); |
302 |
assert(!AM.Disp && "Non-zero displacement is ignored with ES."); |
| 303 |
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); |
303 |
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); |
| 304 |
} else if (AM.MCSym) { |
304 |
} else if (AM.MCSym) { |
| 305 |
assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); |
305 |
assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); |
| 306 |
assert(AM.SymbolFlags == 0 && "oo"); |
306 |
assert(AM.SymbolFlags == 0 && "oo"); |
| 307 |
Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); |
307 |
Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); |
| 308 |
} else if (AM.JT != -1) { |
308 |
} else if (AM.JT != -1) { |
| 309 |
assert(!AM.Disp && "Non-zero displacement is ignored with JT."); |
309 |
assert(!AM.Disp && "Non-zero displacement is ignored with JT."); |
| 310 |
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); |
310 |
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); |
| 311 |
} else if (AM.BlockAddr) |
311 |
} else if (AM.BlockAddr) |
| 312 |
Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, |
312 |
Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, |
| 313 |
AM.SymbolFlags); |
313 |
AM.SymbolFlags); |
| 314 |
else |
314 |
else |
| 315 |
Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); |
315 |
Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); |
| 316 |
|
316 |
|
| 317 |
if (AM.Segment.getNode()) |
317 |
if (AM.Segment.getNode()) |
| 318 |
Segment = AM.Segment; |
318 |
Segment = AM.Segment; |
| 319 |
else |
319 |
else |
| 320 |
Segment = CurDAG->getRegister(0, MVT::i16); |
320 |
Segment = CurDAG->getRegister(0, MVT::i16); |
| 321 |
} |
321 |
} |
| 322 |
|
322 |
|
| 323 |
// Utility function to determine whether we should avoid selecting |
323 |
// Utility function to determine whether we should avoid selecting |
| 324 |
// immediate forms of instructions for better code size or not. |
324 |
// immediate forms of instructions for better code size or not. |
| 325 |
// At a high level, we'd like to avoid such instructions when |
325 |
// At a high level, we'd like to avoid such instructions when |
| 326 |
// we have similar constants used within the same basic block |
326 |
// we have similar constants used within the same basic block |
| 327 |
// that can be kept in a register. |
327 |
// that can be kept in a register. |
| 328 |
// |
328 |
// |
| 329 |
bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { |
329 |
bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { |
| 330 |
uint32_t UseCount = 0; |
330 |
uint32_t UseCount = 0; |
| 331 |
|
331 |
|
| 332 |
// Do not want to hoist if we're not optimizing for size. |
332 |
// Do not want to hoist if we're not optimizing for size. |
| 333 |
// TODO: We'd like to remove this restriction. |
333 |
// TODO: We'd like to remove this restriction. |
| 334 |
// See the comment in X86InstrInfo.td for more info. |
334 |
// See the comment in X86InstrInfo.td for more info. |
| 335 |
if (!CurDAG->shouldOptForSize()) |
335 |
if (!CurDAG->shouldOptForSize()) |
| 336 |
return false; |
336 |
return false; |
| 337 |
|
337 |
|
| 338 |
// Walk all the users of the immediate. |
338 |
// Walk all the users of the immediate. |
| 339 |
for (const SDNode *User : N->uses()) { |
339 |
for (const SDNode *User : N->uses()) { |
| 340 |
if (UseCount >= 2) |
340 |
if (UseCount >= 2) |
| 341 |
break; |
341 |
break; |
| 342 |
|
342 |
|
| 343 |
// This user is already selected. Count it as a legitimate use and |
343 |
// This user is already selected. Count it as a legitimate use and |
| 344 |
// move on. |
344 |
// move on. |
| 345 |
if (User->isMachineOpcode()) { |
345 |
if (User->isMachineOpcode()) { |
| 346 |
UseCount++; |
346 |
UseCount++; |
| 347 |
continue; |
347 |
continue; |
| 348 |
} |
348 |
} |
| 349 |
|
349 |
|
| 350 |
// We want to count stores of immediates as real uses. |
350 |
// We want to count stores of immediates as real uses. |
| 351 |
if (User->getOpcode() == ISD::STORE && |
351 |
if (User->getOpcode() == ISD::STORE && |
| 352 |
User->getOperand(1).getNode() == N) { |
352 |
User->getOperand(1).getNode() == N) { |
| 353 |
UseCount++; |
353 |
UseCount++; |
| 354 |
continue; |
354 |
continue; |
| 355 |
} |
355 |
} |
| 356 |
|
356 |
|
| 357 |
// We don't currently match users that have > 2 operands (except |
357 |
// We don't currently match users that have > 2 operands (except |
| 358 |
// for stores, which are handled above) |
358 |
// for stores, which are handled above) |
| 359 |
// Those instruction won't match in ISEL, for now, and would |
359 |
// Those instruction won't match in ISEL, for now, and would |
| 360 |
// be counted incorrectly. |
360 |
// be counted incorrectly. |
| 361 |
// This may change in the future as we add additional instruction |
361 |
// This may change in the future as we add additional instruction |
| 362 |
// types. |
362 |
// types. |
| 363 |
if (User->getNumOperands() != 2) |
363 |
if (User->getNumOperands() != 2) |
| 364 |
continue; |
364 |
continue; |
| 365 |
|
365 |
|
| 366 |
// If this is a sign-extended 8-bit integer immediate used in an ALU |
366 |
// If this is a sign-extended 8-bit integer immediate used in an ALU |
| 367 |
// instruction, there is probably an opcode encoding to save space. |
367 |
// instruction, there is probably an opcode encoding to save space. |
| 368 |
auto *C = dyn_cast(N); |
368 |
auto *C = dyn_cast(N); |
| 369 |
if (C && isInt<8>(C->getSExtValue())) |
369 |
if (C && isInt<8>(C->getSExtValue())) |
| 370 |
continue; |
370 |
continue; |
| 371 |
|
371 |
|
| 372 |
// Immediates that are used for offsets as part of stack |
372 |
// Immediates that are used for offsets as part of stack |
| 373 |
// manipulation should be left alone. These are typically |
373 |
// manipulation should be left alone. These are typically |
| 374 |
// used to indicate SP offsets for argument passing and |
374 |
// used to indicate SP offsets for argument passing and |
| 375 |
// will get pulled into stores/pushes (implicitly). |
375 |
// will get pulled into stores/pushes (implicitly). |
| 376 |
if (User->getOpcode() == X86ISD::ADD || |
376 |
if (User->getOpcode() == X86ISD::ADD || |
| 377 |
User->getOpcode() == ISD::ADD || |
377 |
User->getOpcode() == ISD::ADD || |
| 378 |
User->getOpcode() == X86ISD::SUB || |
378 |
User->getOpcode() == X86ISD::SUB || |
| 379 |
User->getOpcode() == ISD::SUB) { |
379 |
User->getOpcode() == ISD::SUB) { |
| 380 |
|
380 |
|
| 381 |
// Find the other operand of the add/sub. |
381 |
// Find the other operand of the add/sub. |
| 382 |
SDValue OtherOp = User->getOperand(0); |
382 |
SDValue OtherOp = User->getOperand(0); |
| 383 |
if (OtherOp.getNode() == N) |
383 |
if (OtherOp.getNode() == N) |
| 384 |
OtherOp = User->getOperand(1); |
384 |
OtherOp = User->getOperand(1); |
| 385 |
|
385 |
|
| 386 |
// Don't count if the other operand is SP. |
386 |
// Don't count if the other operand is SP. |
| 387 |
RegisterSDNode *RegNode; |
387 |
RegisterSDNode *RegNode; |
| 388 |
if (OtherOp->getOpcode() == ISD::CopyFromReg && |
388 |
if (OtherOp->getOpcode() == ISD::CopyFromReg && |
| 389 |
(RegNode = dyn_cast_or_null( |
389 |
(RegNode = dyn_cast_or_null( |
| 390 |
OtherOp->getOperand(1).getNode()))) |
390 |
OtherOp->getOperand(1).getNode()))) |
| 391 |
if ((RegNode->getReg() == X86::ESP) || |
391 |
if ((RegNode->getReg() == X86::ESP) || |
| 392 |
(RegNode->getReg() == X86::RSP)) |
392 |
(RegNode->getReg() == X86::RSP)) |
| 393 |
continue; |
393 |
continue; |
| 394 |
} |
394 |
} |
| 395 |
|
395 |
|
| 396 |
// ... otherwise, count this and move on. |
396 |
// ... otherwise, count this and move on. |
| 397 |
UseCount++; |
397 |
UseCount++; |
| 398 |
} |
398 |
} |
| 399 |
|
399 |
|
| 400 |
// If we have more than 1 use, then recommend for hoisting. |
400 |
// If we have more than 1 use, then recommend for hoisting. |
| 401 |
return (UseCount > 1); |
401 |
return (UseCount > 1); |
| 402 |
} |
402 |
} |
| 403 |
|
403 |
|
| 404 |
/// Return a target constant with the specified value of type i8. |
404 |
/// Return a target constant with the specified value of type i8. |
| 405 |
inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { |
405 |
inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { |
| 406 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
406 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
| 407 |
} |
407 |
} |
| 408 |
|
408 |
|
| 409 |
/// Return a target constant with the specified value, of type i32. |
409 |
/// Return a target constant with the specified value, of type i32. |
| 410 |
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { |
410 |
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { |
| 411 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i32); |
411 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i32); |
| 412 |
} |
412 |
} |
| 413 |
|
413 |
|
| 414 |
/// Return a target constant with the specified value, of type i64. |
414 |
/// Return a target constant with the specified value, of type i64. |
| 415 |
inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { |
415 |
inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { |
| 416 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i64); |
416 |
return CurDAG->getTargetConstant(Imm, DL, MVT::i64); |
| 417 |
} |
417 |
} |
| 418 |
|
418 |
|
| 419 |
SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth, |
419 |
SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth, |
| 420 |
const SDLoc &DL) { |
420 |
const SDLoc &DL) { |
| 421 |
assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
421 |
assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
| 422 |
uint64_t Index = N->getConstantOperandVal(1); |
422 |
uint64_t Index = N->getConstantOperandVal(1); |
| 423 |
MVT VecVT = N->getOperand(0).getSimpleValueType(); |
423 |
MVT VecVT = N->getOperand(0).getSimpleValueType(); |
| 424 |
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
424 |
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
| 425 |
} |
425 |
} |
| 426 |
|
426 |
|
| 427 |
SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, |
427 |
SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, |
| 428 |
const SDLoc &DL) { |
428 |
const SDLoc &DL) { |
| 429 |
assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
429 |
assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
| 430 |
uint64_t Index = N->getConstantOperandVal(2); |
430 |
uint64_t Index = N->getConstantOperandVal(2); |
| 431 |
MVT VecVT = N->getSimpleValueType(0); |
431 |
MVT VecVT = N->getSimpleValueType(0); |
| 432 |
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
432 |
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
| 433 |
} |
433 |
} |
| 434 |
|
434 |
|
| 435 |
SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth, |
435 |
SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth, |
| 436 |
const SDLoc &DL) { |
436 |
const SDLoc &DL) { |
| 437 |
assert(VecWidth == 128 && "Unexpected vector width"); |
437 |
assert(VecWidth == 128 && "Unexpected vector width"); |
| 438 |
uint64_t Index = N->getConstantOperandVal(2); |
438 |
uint64_t Index = N->getConstantOperandVal(2); |
| 439 |
MVT VecVT = N->getSimpleValueType(0); |
439 |
MVT VecVT = N->getSimpleValueType(0); |
| 440 |
uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth; |
440 |
uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth; |
| 441 |
assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index"); |
441 |
assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index"); |
| 442 |
// vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub) |
442 |
// vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub) |
| 443 |
// vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub) |
443 |
// vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub) |
| 444 |
return getI8Imm(InsertIdx ? 0x02 : 0x30, DL); |
444 |
return getI8Imm(InsertIdx ? 0x02 : 0x30, DL); |
| 445 |
} |
445 |
} |
| 446 |
|
446 |
|
| 447 |
SDValue getSBBZero(SDNode *N) { |
447 |
SDValue getSBBZero(SDNode *N) { |
| 448 |
SDLoc dl(N); |
448 |
SDLoc dl(N); |
| 449 |
MVT VT = N->getSimpleValueType(0); |
449 |
MVT VT = N->getSimpleValueType(0); |
| 450 |
|
450 |
|
| 451 |
// Create zero. |
451 |
// Create zero. |
| 452 |
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
452 |
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
| 453 |
SDValue Zero = SDValue( |
453 |
SDValue Zero = SDValue( |
| 454 |
CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); |
454 |
CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); |
| 455 |
if (VT == MVT::i64) { |
455 |
if (VT == MVT::i64) { |
| 456 |
Zero = SDValue( |
456 |
Zero = SDValue( |
| 457 |
CurDAG->getMachineNode( |
457 |
CurDAG->getMachineNode( |
| 458 |
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
458 |
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
| 459 |
CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, |
459 |
CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, |
| 460 |
CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), |
460 |
CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), |
| 461 |
0); |
461 |
0); |
| 462 |
} |
462 |
} |
| 463 |
|
463 |
|
| 464 |
// Copy flags to the EFLAGS register and glue it to next node. |
464 |
// Copy flags to the EFLAGS register and glue it to next node. |
| 465 |
unsigned Opcode = N->getOpcode(); |
465 |
unsigned Opcode = N->getOpcode(); |
| 466 |
assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) && |
466 |
assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) && |
| 467 |
"Unexpected opcode for SBB materialization"); |
467 |
"Unexpected opcode for SBB materialization"); |
| 468 |
unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1; |
468 |
unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1; |
| 469 |
SDValue EFLAGS = |
469 |
SDValue EFLAGS = |
| 470 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
470 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
| 471 |
N->getOperand(FlagOpIndex), SDValue()); |
471 |
N->getOperand(FlagOpIndex), SDValue()); |
| 472 |
|
472 |
|
| 473 |
// Create a 64-bit instruction if the result is 64-bits otherwise use the |
473 |
// Create a 64-bit instruction if the result is 64-bits otherwise use the |
| 474 |
// 32-bit version. |
474 |
// 32-bit version. |
| 475 |
unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; |
475 |
unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; |
| 476 |
MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
476 |
MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
| 477 |
VTs = CurDAG->getVTList(SBBVT, MVT::i32); |
477 |
VTs = CurDAG->getVTList(SBBVT, MVT::i32); |
| 478 |
return SDValue( |
478 |
return SDValue( |
| 479 |
CurDAG->getMachineNode(Opc, dl, VTs, |
479 |
CurDAG->getMachineNode(Opc, dl, VTs, |
| 480 |
{Zero, Zero, EFLAGS, EFLAGS.getValue(1)}), |
480 |
{Zero, Zero, EFLAGS, EFLAGS.getValue(1)}), |
| 481 |
0); |
481 |
0); |
| 482 |
} |
482 |
} |
| 483 |
|
483 |
|
| 484 |
// Helper to detect unneeded and instructions on shift amounts. Called |
484 |
// Helper to detect unneeded and instructions on shift amounts. Called |
| 485 |
// from PatFrags in tablegen. |
485 |
// from PatFrags in tablegen. |
| 486 |
bool isUnneededShiftMask(SDNode *N, unsigned Width) const { |
486 |
bool isUnneededShiftMask(SDNode *N, unsigned Width) const { |
| 487 |
assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); |
487 |
assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); |
| 488 |
const APInt &Val = cast(N->getOperand(1))->getAPIntValue(); |
488 |
const APInt &Val = cast(N->getOperand(1))->getAPIntValue(); |
| 489 |
|
489 |
|
| 490 |
if (Val.countr_one() >= Width) |
490 |
if (Val.countr_one() >= Width) |
| 491 |
return true; |
491 |
return true; |
| 492 |
|
492 |
|
| 493 |
APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero; |
493 |
APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero; |
| 494 |
return Mask.countr_one() >= Width; |
494 |
return Mask.countr_one() >= Width; |
| 495 |
} |
495 |
} |
| 496 |
|
496 |
|
| 497 |
/// Return an SDNode that returns the value of the global base register. |
497 |
/// Return an SDNode that returns the value of the global base register. |
| 498 |
/// Output instructions required to initialize the global base register, |
498 |
/// Output instructions required to initialize the global base register, |
| 499 |
/// if necessary. |
499 |
/// if necessary. |
| 500 |
SDNode *getGlobalBaseReg(); |
500 |
SDNode *getGlobalBaseReg(); |
| 501 |
|
501 |
|
| 502 |
/// Return a reference to the TargetMachine, casted to the target-specific |
502 |
/// Return a reference to the TargetMachine, casted to the target-specific |
| 503 |
/// type. |
503 |
/// type. |
| 504 |
const X86TargetMachine &getTargetMachine() const { |
504 |
const X86TargetMachine &getTargetMachine() const { |
| 505 |
return static_cast(TM); |
505 |
return static_cast(TM); |
| 506 |
} |
506 |
} |
| 507 |
|
507 |
|
| 508 |
/// Return a reference to the TargetInstrInfo, casted to the target-specific |
508 |
/// Return a reference to the TargetInstrInfo, casted to the target-specific |
| 509 |
/// type. |
509 |
/// type. |
| 510 |
const X86InstrInfo *getInstrInfo() const { |
510 |
const X86InstrInfo *getInstrInfo() const { |
| 511 |
return Subtarget->getInstrInfo(); |
511 |
return Subtarget->getInstrInfo(); |
| 512 |
} |
512 |
} |
| 513 |
|
513 |
|
| 514 |
/// Return a condition code of the given SDNode |
514 |
/// Return a condition code of the given SDNode |
| 515 |
X86::CondCode getCondFromNode(SDNode *N) const; |
515 |
X86::CondCode getCondFromNode(SDNode *N) const; |
| 516 |
|
516 |
|
| 517 |
/// Address-mode matching performs shift-of-and to and-of-shift |
517 |
/// Address-mode matching performs shift-of-and to and-of-shift |
| 518 |
/// reassociation in order to expose more scaled addressing |
518 |
/// reassociation in order to expose more scaled addressing |
| 519 |
/// opportunities. |
519 |
/// opportunities. |
| 520 |
bool ComplexPatternFuncMutatesDAG() const override { |
520 |
bool ComplexPatternFuncMutatesDAG() const override { |
| 521 |
return true; |
521 |
return true; |
| 522 |
} |
522 |
} |
| 523 |
|
523 |
|
| 524 |
bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; |
524 |
bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; |
| 525 |
|
525 |
|
| 526 |
// Indicates we should prefer to use a non-temporal load for this load. |
526 |
// Indicates we should prefer to use a non-temporal load for this load. |
| 527 |
bool useNonTemporalLoad(LoadSDNode *N) const { |
527 |
bool useNonTemporalLoad(LoadSDNode *N) const { |
| 528 |
if (!N->isNonTemporal()) |
528 |
if (!N->isNonTemporal()) |
| 529 |
return false; |
529 |
return false; |
| 530 |
|
530 |
|
| 531 |
unsigned StoreSize = N->getMemoryVT().getStoreSize(); |
531 |
unsigned StoreSize = N->getMemoryVT().getStoreSize(); |
| 532 |
|
532 |
|
| 533 |
if (N->getAlign().value() < StoreSize) |
533 |
if (N->getAlign().value() < StoreSize) |
| 534 |
return false; |
534 |
return false; |
| 535 |
|
535 |
|
| 536 |
switch (StoreSize) { |
536 |
switch (StoreSize) { |
| 537 |
default: llvm_unreachable("Unsupported store size"); |
537 |
default: llvm_unreachable("Unsupported store size"); |
| 538 |
case 4: |
538 |
case 4: |
| 539 |
case 8: |
539 |
case 8: |
| 540 |
return false; |
540 |
return false; |
| 541 |
case 16: |
541 |
case 16: |
| 542 |
return Subtarget->hasSSE41(); |
542 |
return Subtarget->hasSSE41(); |
| 543 |
case 32: |
543 |
case 32: |
| 544 |
return Subtarget->hasAVX2(); |
544 |
return Subtarget->hasAVX2(); |
| 545 |
case 64: |
545 |
case 64: |
| 546 |
return Subtarget->hasAVX512(); |
546 |
return Subtarget->hasAVX512(); |
| 547 |
} |
547 |
} |
| 548 |
} |
548 |
} |
| 549 |
|
549 |
|
| 550 |
bool foldLoadStoreIntoMemOperand(SDNode *Node); |
550 |
bool foldLoadStoreIntoMemOperand(SDNode *Node); |
| 551 |
MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); |
551 |
MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); |
| 552 |
bool matchBitExtract(SDNode *Node); |
552 |
bool matchBitExtract(SDNode *Node); |
| 553 |
bool shrinkAndImmediate(SDNode *N); |
553 |
bool shrinkAndImmediate(SDNode *N); |
| 554 |
bool isMaskZeroExtended(SDNode *N) const; |
554 |
bool isMaskZeroExtended(SDNode *N) const; |
| 555 |
bool tryShiftAmountMod(SDNode *N); |
555 |
bool tryShiftAmountMod(SDNode *N); |
| 556 |
bool tryShrinkShlLogicImm(SDNode *N); |
556 |
bool tryShrinkShlLogicImm(SDNode *N); |
| 557 |
bool tryVPTERNLOG(SDNode *N); |
557 |
bool tryVPTERNLOG(SDNode *N); |
| 558 |
bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB, |
558 |
bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB, |
| 559 |
SDNode *ParentC, SDValue A, SDValue B, SDValue C, |
559 |
SDNode *ParentC, SDValue A, SDValue B, SDValue C, |
| 560 |
uint8_t Imm); |
560 |
uint8_t Imm); |
| 561 |
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); |
561 |
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); |
| 562 |
bool tryMatchBitSelect(SDNode *N); |
562 |
bool tryMatchBitSelect(SDNode *N); |
| 563 |
|
563 |
|
| 564 |
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
564 |
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
| 565 |
const SDLoc &dl, MVT VT, SDNode *Node); |
565 |
const SDLoc &dl, MVT VT, SDNode *Node); |
| 566 |
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
566 |
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
| 567 |
const SDLoc &dl, MVT VT, SDNode *Node, |
567 |
const SDLoc &dl, MVT VT, SDNode *Node, |
| 568 |
SDValue &InGlue); |
568 |
SDValue &InGlue); |
| 569 |
|
569 |
|
| 570 |
bool tryOptimizeRem8Extend(SDNode *N); |
570 |
bool tryOptimizeRem8Extend(SDNode *N); |
| 571 |
|
571 |
|
| 572 |
bool onlyUsesZeroFlag(SDValue Flags) const; |
572 |
bool onlyUsesZeroFlag(SDValue Flags) const; |
| 573 |
bool hasNoSignFlagUses(SDValue Flags) const; |
573 |
bool hasNoSignFlagUses(SDValue Flags) const; |
| 574 |
bool hasNoCarryFlagUses(SDValue Flags) const; |
574 |
bool hasNoCarryFlagUses(SDValue Flags) const; |
| 575 |
}; |
575 |
}; |
| 576 |
} |
576 |
} |
| 577 |
|
577 |
|
| 578 |
char X86DAGToDAGISel::ID = 0; |
578 |
char X86DAGToDAGISel::ID = 0; |
| 579 |
|
579 |
|
| 580 |
INITIALIZE_PASS(X86DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
580 |
INITIALIZE_PASS(X86DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
| 581 |
|
581 |
|
| 582 |
// Returns true if this masked compare can be implemented legally with this |
582 |
// Returns true if this masked compare can be implemented legally with this |
| 583 |
// type. |
583 |
// type. |
| 584 |
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { |
584 |
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { |
| 585 |
unsigned Opcode = N->getOpcode(); |
585 |
unsigned Opcode = N->getOpcode(); |
| 586 |
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || |
586 |
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || |
| 587 |
Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || |
587 |
Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || |
| 588 |
Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { |
588 |
Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { |
| 589 |
// We can get 256-bit 8 element types here without VLX being enabled. When |
589 |
// We can get 256-bit 8 element types here without VLX being enabled. When |
| 590 |
// this happens we will use 512-bit operations and the mask will not be |
590 |
// this happens we will use 512-bit operations and the mask will not be |
| 591 |
// zero extended. |
591 |
// zero extended. |
| 592 |
EVT OpVT = N->getOperand(0).getValueType(); |
592 |
EVT OpVT = N->getOperand(0).getValueType(); |
| 593 |
// The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the |
593 |
// The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the |
| 594 |
// second operand. |
594 |
// second operand. |
| 595 |
if (Opcode == X86ISD::STRICT_CMPM) |
595 |
if (Opcode == X86ISD::STRICT_CMPM) |
| 596 |
OpVT = N->getOperand(1).getValueType(); |
596 |
OpVT = N->getOperand(1).getValueType(); |
| 597 |
if (OpVT.is256BitVector() || OpVT.is128BitVector()) |
597 |
if (OpVT.is256BitVector() || OpVT.is128BitVector()) |
| 598 |
return Subtarget->hasVLX(); |
598 |
return Subtarget->hasVLX(); |
| 599 |
|
599 |
|
| 600 |
return true; |
600 |
return true; |
| 601 |
} |
601 |
} |
| 602 |
// Scalar opcodes use 128 bit registers, but aren't subject to the VLX check. |
602 |
// Scalar opcodes use 128 bit registers, but aren't subject to the VLX check. |
| 603 |
if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || |
603 |
if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || |
| 604 |
Opcode == X86ISD::FSETCCM_SAE) |
604 |
Opcode == X86ISD::FSETCCM_SAE) |
| 605 |
return true; |
605 |
return true; |
| 606 |
|
606 |
|
| 607 |
return false; |
607 |
return false; |
| 608 |
} |
608 |
} |
| 609 |
|
609 |
|
| 610 |
// Returns true if we can assume the writer of the mask has zero extended it |
610 |
// Returns true if we can assume the writer of the mask has zero extended it |
| 611 |
// for us. |
611 |
// for us. |
| 612 |
bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { |
612 |
bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { |
| 613 |
// If this is an AND, check if we have a compare on either side. As long as |
613 |
// If this is an AND, check if we have a compare on either side. As long as |
| 614 |
// one side guarantees the mask is zero extended, the AND will preserve those |
614 |
// one side guarantees the mask is zero extended, the AND will preserve those |
| 615 |
// zeros. |
615 |
// zeros. |
| 616 |
if (N->getOpcode() == ISD::AND) |
616 |
if (N->getOpcode() == ISD::AND) |
| 617 |
return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || |
617 |
return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || |
| 618 |
isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); |
618 |
isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); |
| 619 |
|
619 |
|
| 620 |
return isLegalMaskCompare(N, Subtarget); |
620 |
return isLegalMaskCompare(N, Subtarget); |
| 621 |
} |
621 |
} |
| 622 |
|
622 |
|
| 623 |
bool |
623 |
bool |
| 624 |
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { |
624 |
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { |
| 625 |
if (OptLevel == CodeGenOpt::None) return false; |
625 |
if (OptLevel == CodeGenOpt::None) return false; |
| 626 |
|
626 |
|
| 627 |
if (!N.hasOneUse()) |
627 |
if (!N.hasOneUse()) |
| 628 |
return false; |
628 |
return false; |
| 629 |
|
629 |
|
| 630 |
if (N.getOpcode() != ISD::LOAD) |
630 |
if (N.getOpcode() != ISD::LOAD) |
| 631 |
return true; |
631 |
return true; |
| 632 |
|
632 |
|
| 633 |
// Don't fold non-temporal loads if we have an instruction for them. |
633 |
// Don't fold non-temporal loads if we have an instruction for them. |
| 634 |
if (useNonTemporalLoad(cast(N))) |
634 |
if (useNonTemporalLoad(cast(N))) |
| 635 |
return false; |
635 |
return false; |
| 636 |
|
636 |
|
| 637 |
// If N is a load, do additional profitability checks. |
637 |
// If N is a load, do additional profitability checks. |
| 638 |
if (U == Root) { |
638 |
if (U == Root) { |
| 639 |
switch (U->getOpcode()) { |
639 |
switch (U->getOpcode()) { |
| 640 |
default: break; |
640 |
default: break; |
| 641 |
case X86ISD::ADD: |
641 |
case X86ISD::ADD: |
| 642 |
case X86ISD::ADC: |
642 |
case X86ISD::ADC: |
| 643 |
case X86ISD::SUB: |
643 |
case X86ISD::SUB: |
| 644 |
case X86ISD::SBB: |
644 |
case X86ISD::SBB: |
| 645 |
case X86ISD::AND: |
645 |
case X86ISD::AND: |
| 646 |
case X86ISD::XOR: |
646 |
case X86ISD::XOR: |
| 647 |
case X86ISD::OR: |
647 |
case X86ISD::OR: |
| 648 |
case ISD::ADD: |
648 |
case ISD::ADD: |
| 649 |
case ISD::UADDO_CARRY: |
649 |
case ISD::UADDO_CARRY: |
| 650 |
case ISD::AND: |
650 |
case ISD::AND: |
| 651 |
case ISD::OR: |
651 |
case ISD::OR: |
| 652 |
case ISD::XOR: { |
652 |
case ISD::XOR: { |
| 653 |
SDValue Op1 = U->getOperand(1); |
653 |
SDValue Op1 = U->getOperand(1); |
| 654 |
|
654 |
|
| 655 |
// If the other operand is a 8-bit immediate we should fold the immediate |
655 |
// If the other operand is a 8-bit immediate we should fold the immediate |
| 656 |
// instead. This reduces code size. |
656 |
// instead. This reduces code size. |
| 657 |
// e.g. |
657 |
// e.g. |
| 658 |
// movl 4(%esp), %eax |
658 |
// movl 4(%esp), %eax |
| 659 |
// addl $4, %eax |
659 |
// addl $4, %eax |
| 660 |
// vs. |
660 |
// vs. |
| 661 |
// movl $4, %eax |
661 |
// movl $4, %eax |
| 662 |
// addl 4(%esp), %eax |
662 |
// addl 4(%esp), %eax |
| 663 |
// The former is 2 bytes shorter. In case where the increment is 1, then |
663 |
// The former is 2 bytes shorter. In case where the increment is 1, then |
| 664 |
// the saving can be 4 bytes (by using incl %eax). |
664 |
// the saving can be 4 bytes (by using incl %eax). |
| 665 |
if (auto *Imm = dyn_cast(Op1)) { |
665 |
if (auto *Imm = dyn_cast(Op1)) { |
| 666 |
if (Imm->getAPIntValue().isSignedIntN(8)) |
666 |
if (Imm->getAPIntValue().isSignedIntN(8)) |
| 667 |
return false; |
667 |
return false; |
| 668 |
|
668 |
|
| 669 |
// If this is a 64-bit AND with an immediate that fits in 32-bits, |
669 |
// If this is a 64-bit AND with an immediate that fits in 32-bits, |
| 670 |
// prefer using the smaller and over folding the load. This is needed to |
670 |
// prefer using the smaller and over folding the load. This is needed to |
| 671 |
// make sure immediates created by shrinkAndImmediate are always folded. |
671 |
// make sure immediates created by shrinkAndImmediate are always folded. |
| 672 |
// Ideally we would narrow the load during DAG combine and get the |
672 |
// Ideally we would narrow the load during DAG combine and get the |
| 673 |
// best of both worlds. |
673 |
// best of both worlds. |
| 674 |
if (U->getOpcode() == ISD::AND && |
674 |
if (U->getOpcode() == ISD::AND && |
| 675 |
Imm->getAPIntValue().getBitWidth() == 64 && |
675 |
Imm->getAPIntValue().getBitWidth() == 64 && |
| 676 |
Imm->getAPIntValue().isIntN(32)) |
676 |
Imm->getAPIntValue().isIntN(32)) |
| 677 |
return false; |
677 |
return false; |
| 678 |
|
678 |
|
| 679 |
// If this really a zext_inreg that can be represented with a movzx |
679 |
// If this really a zext_inreg that can be represented with a movzx |
| 680 |
// instruction, prefer that. |
680 |
// instruction, prefer that. |
| 681 |
// TODO: We could shrink the load and fold if it is non-volatile. |
681 |
// TODO: We could shrink the load and fold if it is non-volatile. |
| 682 |
if (U->getOpcode() == ISD::AND && |
682 |
if (U->getOpcode() == ISD::AND && |
| 683 |
(Imm->getAPIntValue() == UINT8_MAX || |
683 |
(Imm->getAPIntValue() == UINT8_MAX || |
| 684 |
Imm->getAPIntValue() == UINT16_MAX || |
684 |
Imm->getAPIntValue() == UINT16_MAX || |
| 685 |
Imm->getAPIntValue() == UINT32_MAX)) |
685 |
Imm->getAPIntValue() == UINT32_MAX)) |
| 686 |
return false; |
686 |
return false; |
| 687 |
|
687 |
|
| 688 |
// ADD/SUB with can negate the immediate and use the opposite operation |
688 |
// ADD/SUB with can negate the immediate and use the opposite operation |
| 689 |
// to fit 128 into a sign extended 8 bit immediate. |
689 |
// to fit 128 into a sign extended 8 bit immediate. |
| 690 |
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && |
690 |
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && |
| 691 |
(-Imm->getAPIntValue()).isSignedIntN(8)) |
691 |
(-Imm->getAPIntValue()).isSignedIntN(8)) |
| 692 |
return false; |
692 |
return false; |
| 693 |
|
693 |
|
| 694 |
if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && |
694 |
if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && |
| 695 |
(-Imm->getAPIntValue()).isSignedIntN(8) && |
695 |
(-Imm->getAPIntValue()).isSignedIntN(8) && |
| 696 |
hasNoCarryFlagUses(SDValue(U, 1))) |
696 |
hasNoCarryFlagUses(SDValue(U, 1))) |
| 697 |
return false; |
697 |
return false; |
| 698 |
} |
698 |
} |
| 699 |
|
699 |
|
| 700 |
// If the other operand is a TLS address, we should fold it instead. |
700 |
// If the other operand is a TLS address, we should fold it instead. |
| 701 |
// This produces |
701 |
// This produces |
| 702 |
// movl %gs:0, %eax |
702 |
// movl %gs:0, %eax |
| 703 |
// leal i@NTPOFF(%eax), %eax |
703 |
// leal i@NTPOFF(%eax), %eax |
| 704 |
// instead of |
704 |
// instead of |
| 705 |
// movl $i@NTPOFF, %eax |
705 |
// movl $i@NTPOFF, %eax |
| 706 |
// addl %gs:0, %eax |
706 |
// addl %gs:0, %eax |
| 707 |
// if the block also has an access to a second TLS address this will save |
707 |
// if the block also has an access to a second TLS address this will save |
| 708 |
// a load. |
708 |
// a load. |
| 709 |
// FIXME: This is probably also true for non-TLS addresses. |
709 |
// FIXME: This is probably also true for non-TLS addresses. |
| 710 |
if (Op1.getOpcode() == X86ISD::Wrapper) { |
710 |
if (Op1.getOpcode() == X86ISD::Wrapper) { |
| 711 |
SDValue Val = Op1.getOperand(0); |
711 |
SDValue Val = Op1.getOperand(0); |
| 712 |
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
712 |
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
| 713 |
return false; |
713 |
return false; |
| 714 |
} |
714 |
} |
| 715 |
|
715 |
|
| 716 |
// Don't fold load if this matches the BTS/BTR/BTC patterns. |
716 |
// Don't fold load if this matches the BTS/BTR/BTC patterns. |
| 717 |
// BTS: (or X, (shl 1, n)) |
717 |
// BTS: (or X, (shl 1, n)) |
| 718 |
// BTR: (and X, (rotl -2, n)) |
718 |
// BTR: (and X, (rotl -2, n)) |
| 719 |
// BTC: (xor X, (shl 1, n)) |
719 |
// BTC: (xor X, (shl 1, n)) |
| 720 |
if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { |
720 |
if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { |
| 721 |
if (U->getOperand(0).getOpcode() == ISD::SHL && |
721 |
if (U->getOperand(0).getOpcode() == ISD::SHL && |
| 722 |
isOneConstant(U->getOperand(0).getOperand(0))) |
722 |
isOneConstant(U->getOperand(0).getOperand(0))) |
| 723 |
return false; |
723 |
return false; |
| 724 |
|
724 |
|
| 725 |
if (U->getOperand(1).getOpcode() == ISD::SHL && |
725 |
if (U->getOperand(1).getOpcode() == ISD::SHL && |
| 726 |
isOneConstant(U->getOperand(1).getOperand(0))) |
726 |
isOneConstant(U->getOperand(1).getOperand(0))) |
| 727 |
return false; |
727 |
return false; |
| 728 |
} |
728 |
} |
| 729 |
if (U->getOpcode() == ISD::AND) { |
729 |
if (U->getOpcode() == ISD::AND) { |
| 730 |
SDValue U0 = U->getOperand(0); |
730 |
SDValue U0 = U->getOperand(0); |
| 731 |
SDValue U1 = U->getOperand(1); |
731 |
SDValue U1 = U->getOperand(1); |
| 732 |
if (U0.getOpcode() == ISD::ROTL) { |
732 |
if (U0.getOpcode() == ISD::ROTL) { |
| 733 |
auto *C = dyn_cast(U0.getOperand(0)); |
733 |
auto *C = dyn_cast(U0.getOperand(0)); |
| 734 |
if (C && C->getSExtValue() == -2) |
734 |
if (C && C->getSExtValue() == -2) |
| 735 |
return false; |
735 |
return false; |
| 736 |
} |
736 |
} |
| 737 |
|
737 |
|
| 738 |
if (U1.getOpcode() == ISD::ROTL) { |
738 |
if (U1.getOpcode() == ISD::ROTL) { |
| 739 |
auto *C = dyn_cast(U1.getOperand(0)); |
739 |
auto *C = dyn_cast(U1.getOperand(0)); |
| 740 |
if (C && C->getSExtValue() == -2) |
740 |
if (C && C->getSExtValue() == -2) |
| 741 |
return false; |
741 |
return false; |
| 742 |
} |
742 |
} |
| 743 |
} |
743 |
} |
| 744 |
|
744 |
|
| 745 |
break; |
745 |
break; |
| 746 |
} |
746 |
} |
| 747 |
case ISD::SHL: |
747 |
case ISD::SHL: |
| 748 |
case ISD::SRA: |
748 |
case ISD::SRA: |
| 749 |
case ISD::SRL: |
749 |
case ISD::SRL: |
| 750 |
// Don't fold a load into a shift by immediate. The BMI2 instructions |
750 |
// Don't fold a load into a shift by immediate. The BMI2 instructions |
| 751 |
// support folding a load, but not an immediate. The legacy instructions |
751 |
// support folding a load, but not an immediate. The legacy instructions |
| 752 |
// support folding an immediate, but can't fold a load. Folding an |
752 |
// support folding an immediate, but can't fold a load. Folding an |
| 753 |
// immediate is preferable to folding a load. |
753 |
// immediate is preferable to folding a load. |
| 754 |
if (isa(U->getOperand(1))) |
754 |
if (isa(U->getOperand(1))) |
| 755 |
return false; |
755 |
return false; |
| 756 |
|
756 |
|
| 757 |
break; |
757 |
break; |
| 758 |
} |
758 |
} |
| 759 |
} |
759 |
} |
| 760 |
|
760 |
|
| 761 |
// Prevent folding a load if this can implemented with an insert_subreg or |
761 |
// Prevent folding a load if this can implemented with an insert_subreg or |
| 762 |
// a move that implicitly zeroes. |
762 |
// a move that implicitly zeroes. |
| 763 |
if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && |
763 |
if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && |
| 764 |
isNullConstant(Root->getOperand(2)) && |
764 |
isNullConstant(Root->getOperand(2)) && |
| 765 |
(Root->getOperand(0).isUndef() || |
765 |
(Root->getOperand(0).isUndef() || |
| 766 |
ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode()))) |
766 |
ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode()))) |
| 767 |
return false; |
767 |
return false; |
| 768 |
|
768 |
|
| 769 |
return true; |
769 |
return true; |
| 770 |
} |
770 |
} |
| 771 |
|
771 |
|
| 772 |
// Indicates it is profitable to form an AVX512 masked operation. Returning |
772 |
// Indicates it is profitable to form an AVX512 masked operation. Returning |
| 773 |
// false will favor a masked register-register masked move or vblendm and the |
773 |
// false will favor a masked register-register masked move or vblendm and the |
| 774 |
// operation will be selected separately. |
774 |
// operation will be selected separately. |
| 775 |
bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { |
775 |
bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { |
| 776 |
assert( |
776 |
assert( |
| 777 |
(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && |
777 |
(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && |
| 778 |
"Unexpected opcode!"); |
778 |
"Unexpected opcode!"); |
| 779 |
|
779 |
|
| 780 |
// If the operation has additional users, the operation will be duplicated. |
780 |
// If the operation has additional users, the operation will be duplicated. |
| 781 |
// Check the use count to prevent that. |
781 |
// Check the use count to prevent that. |
| 782 |
// FIXME: Are there cheap opcodes we might want to duplicate? |
782 |
// FIXME: Are there cheap opcodes we might want to duplicate? |
| 783 |
return N->getOperand(1).hasOneUse(); |
783 |
return N->getOperand(1).hasOneUse(); |
| 784 |
} |
784 |
} |
| 785 |
|
785 |
|
| 786 |
/// Replace the original chain operand of the call with |
786 |
/// Replace the original chain operand of the call with |
| 787 |
/// load's chain operand and move load below the call's chain operand. |
787 |
/// load's chain operand and move load below the call's chain operand. |
| 788 |
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, |
788 |
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, |
| 789 |
SDValue Call, SDValue OrigChain) { |
789 |
SDValue Call, SDValue OrigChain) { |
| 790 |
SmallVector Ops; |
790 |
SmallVector Ops; |
| 791 |
SDValue Chain = OrigChain.getOperand(0); |
791 |
SDValue Chain = OrigChain.getOperand(0); |
| 792 |
if (Chain.getNode() == Load.getNode()) |
792 |
if (Chain.getNode() == Load.getNode()) |
| 793 |
Ops.push_back(Load.getOperand(0)); |
793 |
Ops.push_back(Load.getOperand(0)); |
| 794 |
else { |
794 |
else { |
| 795 |
assert(Chain.getOpcode() == ISD::TokenFactor && |
795 |
assert(Chain.getOpcode() == ISD::TokenFactor && |
| 796 |
"Unexpected chain operand"); |
796 |
"Unexpected chain operand"); |
| 797 |
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) |
797 |
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) |
| 798 |
if (Chain.getOperand(i).getNode() == Load.getNode()) |
798 |
if (Chain.getOperand(i).getNode() == Load.getNode()) |
| 799 |
Ops.push_back(Load.getOperand(0)); |
799 |
Ops.push_back(Load.getOperand(0)); |
| 800 |
else |
800 |
else |
| 801 |
Ops.push_back(Chain.getOperand(i)); |
801 |
Ops.push_back(Chain.getOperand(i)); |
| 802 |
SDValue NewChain = |
802 |
SDValue NewChain = |
| 803 |
CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); |
803 |
CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); |
| 804 |
Ops.clear(); |
804 |
Ops.clear(); |
| 805 |
Ops.push_back(NewChain); |
805 |
Ops.push_back(NewChain); |
| 806 |
} |
806 |
} |
| 807 |
Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); |
807 |
Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); |
| 808 |
CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); |
808 |
CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); |
| 809 |
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), |
809 |
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), |
| 810 |
Load.getOperand(1), Load.getOperand(2)); |
810 |
Load.getOperand(1), Load.getOperand(2)); |
| 811 |
|
811 |
|
| 812 |
Ops.clear(); |
812 |
Ops.clear(); |
| 813 |
Ops.push_back(SDValue(Load.getNode(), 1)); |
813 |
Ops.push_back(SDValue(Load.getNode(), 1)); |
| 814 |
Ops.append(Call->op_begin() + 1, Call->op_end()); |
814 |
Ops.append(Call->op_begin() + 1, Call->op_end()); |
| 815 |
CurDAG->UpdateNodeOperands(Call.getNode(), Ops); |
815 |
CurDAG->UpdateNodeOperands(Call.getNode(), Ops); |
| 816 |
} |
816 |
} |
| 817 |
|
817 |
|
| 818 |
/// Return true if call address is a load and it can be |
818 |
/// Return true if call address is a load and it can be |
| 819 |
/// moved below CALLSEQ_START and the chains leading up to the call. |
819 |
/// moved below CALLSEQ_START and the chains leading up to the call. |
| 820 |
/// Return the CALLSEQ_START by reference as a second output. |
820 |
/// Return the CALLSEQ_START by reference as a second output. |
| 821 |
/// In the case of a tail call, there isn't a callseq node between the call |
821 |
/// In the case of a tail call, there isn't a callseq node between the call |
| 822 |
/// chain and the load. |
822 |
/// chain and the load. |
| 823 |
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { |
823 |
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { |
| 824 |
// The transformation is somewhat dangerous if the call's chain was glued to |
824 |
// The transformation is somewhat dangerous if the call's chain was glued to |
| 825 |
// the call. After MoveBelowOrigChain the load is moved between the call and |
825 |
// the call. After MoveBelowOrigChain the load is moved between the call and |
| 826 |
// the chain, this can create a cycle if the load is not folded. So it is |
826 |
// the chain, this can create a cycle if the load is not folded. So it is |
| 827 |
// *really* important that we are sure the load will be folded. |
827 |
// *really* important that we are sure the load will be folded. |
| 828 |
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) |
828 |
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) |
| 829 |
return false; |
829 |
return false; |
| 830 |
auto *LD = dyn_cast(Callee.getNode()); |
830 |
auto *LD = dyn_cast(Callee.getNode()); |
| 831 |
if (!LD || |
831 |
if (!LD || |
| 832 |
!LD->isSimple() || |
832 |
!LD->isSimple() || |
| 833 |
LD->getAddressingMode() != ISD::UNINDEXED || |
833 |
LD->getAddressingMode() != ISD::UNINDEXED || |
| 834 |
LD->getExtensionType() != ISD::NON_EXTLOAD) |
834 |
LD->getExtensionType() != ISD::NON_EXTLOAD) |
| 835 |
return false; |
835 |
return false; |
| 836 |
|
836 |
|
| 837 |
// Now let's find the callseq_start. |
837 |
// Now let's find the callseq_start. |
| 838 |
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { |
838 |
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { |
| 839 |
if (!Chain.hasOneUse()) |
839 |
if (!Chain.hasOneUse()) |
| 840 |
return false; |
840 |
return false; |
| 841 |
Chain = Chain.getOperand(0); |
841 |
Chain = Chain.getOperand(0); |
| 842 |
} |
842 |
} |
| 843 |
|
843 |
|
| 844 |
if (!Chain.getNumOperands()) |
844 |
if (!Chain.getNumOperands()) |
| 845 |
return false; |
845 |
return false; |
| 846 |
// Since we are not checking for AA here, conservatively abort if the chain |
846 |
// Since we are not checking for AA here, conservatively abort if the chain |
| 847 |
// writes to memory. It's not safe to move the callee (a load) across a store. |
847 |
// writes to memory. It's not safe to move the callee (a load) across a store. |
| 848 |
if (isa(Chain.getNode()) && |
848 |
if (isa(Chain.getNode()) && |
| 849 |
cast(Chain.getNode())->writeMem()) |
849 |
cast(Chain.getNode())->writeMem()) |
| 850 |
return false; |
850 |
return false; |
| 851 |
if (Chain.getOperand(0).getNode() == Callee.getNode()) |
851 |
if (Chain.getOperand(0).getNode() == Callee.getNode()) |
| 852 |
return true; |
852 |
return true; |
| 853 |
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && |
853 |
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && |
| 854 |
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && |
854 |
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && |
| 855 |
Callee.getValue(1).hasOneUse()) |
855 |
Callee.getValue(1).hasOneUse()) |
| 856 |
return true; |
856 |
return true; |
| 857 |
return false; |
857 |
return false; |
| 858 |
} |
858 |
} |
| 859 |
|
859 |
|
| 860 |
static bool isEndbrImm64(uint64_t Imm) { |
860 |
static bool isEndbrImm64(uint64_t Imm) { |
| 861 |
// There may be some other prefix bytes between 0xF3 and 0x0F1EFA. |
861 |
// There may be some other prefix bytes between 0xF3 and 0x0F1EFA. |
| 862 |
// i.g: 0xF3660F1EFA, 0xF3670F1EFA |
862 |
// i.g: 0xF3660F1EFA, 0xF3670F1EFA |
| 863 |
if ((Imm & 0x00FFFFFF) != 0x0F1EFA) |
863 |
if ((Imm & 0x00FFFFFF) != 0x0F1EFA) |
| 864 |
return false; |
864 |
return false; |
| 865 |
|
865 |
|
| 866 |
uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, |
866 |
uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, |
| 867 |
0x65, 0x66, 0x67, 0xf0, 0xf2}; |
867 |
0x65, 0x66, 0x67, 0xf0, 0xf2}; |
| 868 |
int i = 24; // 24bit 0x0F1EFA has matched |
868 |
int i = 24; // 24bit 0x0F1EFA has matched |
| 869 |
while (i < 64) { |
869 |
while (i < 64) { |
| 870 |
uint8_t Byte = (Imm >> i) & 0xFF; |
870 |
uint8_t Byte = (Imm >> i) & 0xFF; |
| 871 |
if (Byte == 0xF3) |
871 |
if (Byte == 0xF3) |
| 872 |
return true; |
872 |
return true; |
| 873 |
if (!llvm::is_contained(OptionalPrefixBytes, Byte)) |
873 |
if (!llvm::is_contained(OptionalPrefixBytes, Byte)) |
| 874 |
return false; |
874 |
return false; |
| 875 |
i += 8; |
875 |
i += 8; |
| 876 |
} |
876 |
} |
| 877 |
|
877 |
|
| 878 |
return false; |
878 |
return false; |
| 879 |
} |
879 |
} |
| 880 |
|
880 |
|
| 881 |
void X86DAGToDAGISel::PreprocessISelDAG() { |
881 |
void X86DAGToDAGISel::PreprocessISelDAG() { |
| 882 |
bool MadeChange = false; |
882 |
bool MadeChange = false; |
| 883 |
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), |
883 |
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), |
| 884 |
E = CurDAG->allnodes_end(); I != E; ) { |
884 |
E = CurDAG->allnodes_end(); I != E; ) { |
| 885 |
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. |
885 |
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. |
| 886 |
|
886 |
|
| 887 |
// This is for CET enhancement. |
887 |
// This is for CET enhancement. |
| 888 |
// |
888 |
// |
| 889 |
// ENDBR32 and ENDBR64 have specific opcodes: |
889 |
// ENDBR32 and ENDBR64 have specific opcodes: |
| 890 |
// ENDBR32: F3 0F 1E FB |
890 |
// ENDBR32: F3 0F 1E FB |
| 891 |
// ENDBR64: F3 0F 1E FA |
891 |
// ENDBR64: F3 0F 1E FA |
| 892 |
// And we want that attackers won’t find unintended ENDBR32/64 |
892 |
// And we want that attackers won’t find unintended ENDBR32/64 |
| 893 |
// opcode matches in the binary |
893 |
// opcode matches in the binary |
| 894 |
// Here’s an example: |
894 |
// Here’s an example: |
| 895 |
// If the compiler had to generate asm for the following code: |
895 |
// If the compiler had to generate asm for the following code: |
| 896 |
// a = 0xF30F1EFA |
896 |
// a = 0xF30F1EFA |
| 897 |
// it could, for example, generate: |
897 |
// it could, for example, generate: |
| 898 |
// mov 0xF30F1EFA, dword ptr[a] |
898 |
// mov 0xF30F1EFA, dword ptr[a] |
| 899 |
// In such a case, the binary would include a gadget that starts |
899 |
// In such a case, the binary would include a gadget that starts |
| 900 |
// with a fake ENDBR64 opcode. Therefore, we split such generation |
900 |
// with a fake ENDBR64 opcode. Therefore, we split such generation |
| 901 |
// into multiple operations, let it not shows in the binary |
901 |
// into multiple operations, let it not shows in the binary |
| 902 |
if (N->getOpcode() == ISD::Constant) { |
902 |
if (N->getOpcode() == ISD::Constant) { |
| 903 |
MVT VT = N->getSimpleValueType(0); |
903 |
MVT VT = N->getSimpleValueType(0); |
| 904 |
int64_t Imm = cast(N)->getSExtValue(); |
904 |
int64_t Imm = cast(N)->getSExtValue(); |
| 905 |
int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; |
905 |
int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; |
| 906 |
if (Imm == EndbrImm || isEndbrImm64(Imm)) { |
906 |
if (Imm == EndbrImm || isEndbrImm64(Imm)) { |
| 907 |
// Check that the cf-protection-branch is enabled. |
907 |
// Check that the cf-protection-branch is enabled. |
| 908 |
Metadata *CFProtectionBranch = |
908 |
Metadata *CFProtectionBranch = |
| 909 |
MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); |
909 |
MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); |
| 910 |
if (CFProtectionBranch || IndirectBranchTracking) { |
910 |
if (CFProtectionBranch || IndirectBranchTracking) { |
| 911 |
SDLoc dl(N); |
911 |
SDLoc dl(N); |
| 912 |
SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true); |
912 |
SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true); |
| 913 |
Complement = CurDAG->getNOT(dl, Complement, VT); |
913 |
Complement = CurDAG->getNOT(dl, Complement, VT); |
| 914 |
--I; |
914 |
--I; |
| 915 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement); |
915 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement); |
| 916 |
++I; |
916 |
++I; |
| 917 |
MadeChange = true; |
917 |
MadeChange = true; |
| 918 |
continue; |
918 |
continue; |
| 919 |
} |
919 |
} |
| 920 |
} |
920 |
} |
| 921 |
} |
921 |
} |
| 922 |
|
922 |
|
| 923 |
// If this is a target specific AND node with no flag usages, turn it back |
923 |
// If this is a target specific AND node with no flag usages, turn it back |
| 924 |
// into ISD::AND to enable test instruction matching. |
924 |
// into ISD::AND to enable test instruction matching. |
| 925 |
if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) { |
925 |
if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) { |
| 926 |
SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0), |
926 |
SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0), |
| 927 |
N->getOperand(0), N->getOperand(1)); |
927 |
N->getOperand(0), N->getOperand(1)); |
| 928 |
--I; |
928 |
--I; |
| 929 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
929 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
| 930 |
++I; |
930 |
++I; |
| 931 |
MadeChange = true; |
931 |
MadeChange = true; |
| 932 |
continue; |
932 |
continue; |
| 933 |
} |
933 |
} |
| 934 |
|
934 |
|
| 935 |
// Convert vector increment or decrement to sub/add with an all-ones |
935 |
// Convert vector increment or decrement to sub/add with an all-ones |
| 936 |
// constant: |
936 |
// constant: |
| 937 |
// add X, <1, 1...> --> sub X, <-1, -1...> |
937 |
// add X, <1, 1...> --> sub X, <-1, -1...> |
| 938 |
// sub X, <1, 1...> --> add X, <-1, -1...> |
938 |
// sub X, <1, 1...> --> add X, <-1, -1...> |
| 939 |
// The all-ones vector constant can be materialized using a pcmpeq |
939 |
// The all-ones vector constant can be materialized using a pcmpeq |
| 940 |
// instruction that is commonly recognized as an idiom (has no register |
940 |
// instruction that is commonly recognized as an idiom (has no register |
| 941 |
// dependency), so that's better/smaller than loading a splat 1 constant. |
941 |
// dependency), so that's better/smaller than loading a splat 1 constant. |
| 942 |
// |
942 |
// |
| 943 |
// But don't do this if it would inhibit a potentially profitable load |
943 |
// But don't do this if it would inhibit a potentially profitable load |
| 944 |
// folding opportunity for the other operand. That only occurs with the |
944 |
// folding opportunity for the other operand. That only occurs with the |
| 945 |
// intersection of: |
945 |
// intersection of: |
| 946 |
// (1) The other operand (op0) is load foldable. |
946 |
// (1) The other operand (op0) is load foldable. |
| 947 |
// (2) The op is an add (otherwise, we are *creating* an add and can still |
947 |
// (2) The op is an add (otherwise, we are *creating* an add and can still |
| 948 |
// load fold the other op). |
948 |
// load fold the other op). |
| 949 |
// (3) The target has AVX (otherwise, we have a destructive add and can't |
949 |
// (3) The target has AVX (otherwise, we have a destructive add and can't |
| 950 |
// load fold the other op without killing the constant op). |
950 |
// load fold the other op without killing the constant op). |
| 951 |
// (4) The constant 1 vector has multiple uses (so it is profitable to load |
951 |
// (4) The constant 1 vector has multiple uses (so it is profitable to load |
| 952 |
// into a register anyway). |
952 |
// into a register anyway). |
| 953 |
auto mayPreventLoadFold = [&]() { |
953 |
auto mayPreventLoadFold = [&]() { |
| 954 |
return X86::mayFoldLoad(N->getOperand(0), *Subtarget) && |
954 |
return X86::mayFoldLoad(N->getOperand(0), *Subtarget) && |
| 955 |
N->getOpcode() == ISD::ADD && Subtarget->hasAVX() && |
955 |
N->getOpcode() == ISD::ADD && Subtarget->hasAVX() && |
| 956 |
!N->getOperand(1).hasOneUse(); |
956 |
!N->getOperand(1).hasOneUse(); |
| 957 |
}; |
957 |
}; |
| 958 |
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && |
958 |
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && |
| 959 |
N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) { |
959 |
N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) { |
| 960 |
APInt SplatVal; |
960 |
APInt SplatVal; |
| 961 |
if (X86::isConstantSplat(N->getOperand(1), SplatVal) && |
961 |
if (X86::isConstantSplat(N->getOperand(1), SplatVal) && |
| 962 |
SplatVal.isOne()) { |
962 |
SplatVal.isOne()) { |
| 963 |
SDLoc DL(N); |
963 |
SDLoc DL(N); |
| 964 |
|
964 |
|
| 965 |
MVT VT = N->getSimpleValueType(0); |
965 |
MVT VT = N->getSimpleValueType(0); |
| 966 |
unsigned NumElts = VT.getSizeInBits() / 32; |
966 |
unsigned NumElts = VT.getSizeInBits() / 32; |
| 967 |
SDValue AllOnes = |
967 |
SDValue AllOnes = |
| 968 |
CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts)); |
968 |
CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts)); |
| 969 |
AllOnes = CurDAG->getBitcast(VT, AllOnes); |
969 |
AllOnes = CurDAG->getBitcast(VT, AllOnes); |
| 970 |
|
970 |
|
| 971 |
unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; |
971 |
unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; |
| 972 |
SDValue Res = |
972 |
SDValue Res = |
| 973 |
CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes); |
973 |
CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes); |
| 974 |
--I; |
974 |
--I; |
| 975 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
975 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
| 976 |
++I; |
976 |
++I; |
| 977 |
MadeChange = true; |
977 |
MadeChange = true; |
| 978 |
continue; |
978 |
continue; |
| 979 |
} |
979 |
} |
| 980 |
} |
980 |
} |
| 981 |
|
981 |
|
| 982 |
switch (N->getOpcode()) { |
982 |
switch (N->getOpcode()) { |
| 983 |
case X86ISD::VBROADCAST: { |
983 |
case X86ISD::VBROADCAST: { |
| 984 |
MVT VT = N->getSimpleValueType(0); |
984 |
MVT VT = N->getSimpleValueType(0); |
| 985 |
// Emulate v32i16/v64i8 broadcast without BWI. |
985 |
// Emulate v32i16/v64i8 broadcast without BWI. |
| 986 |
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
986 |
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
| 987 |
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
987 |
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
| 988 |
SDLoc dl(N); |
988 |
SDLoc dl(N); |
| 989 |
SDValue NarrowBCast = |
989 |
SDValue NarrowBCast = |
| 990 |
CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0)); |
990 |
CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0)); |
| 991 |
SDValue Res = |
991 |
SDValue Res = |
| 992 |
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
992 |
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
| 993 |
NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
993 |
NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
| 994 |
unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
994 |
unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
| 995 |
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
995 |
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
| 996 |
CurDAG->getIntPtrConstant(Index, dl)); |
996 |
CurDAG->getIntPtrConstant(Index, dl)); |
| 997 |
|
997 |
|
| 998 |
--I; |
998 |
--I; |
| 999 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
999 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
| 1000 |
++I; |
1000 |
++I; |
| 1001 |
MadeChange = true; |
1001 |
MadeChange = true; |
| 1002 |
continue; |
1002 |
continue; |
| 1003 |
} |
1003 |
} |
| 1004 |
|
1004 |
|
| 1005 |
break; |
1005 |
break; |
| 1006 |
} |
1006 |
} |
| 1007 |
case X86ISD::VBROADCAST_LOAD: { |
1007 |
case X86ISD::VBROADCAST_LOAD: { |
| 1008 |
MVT VT = N->getSimpleValueType(0); |
1008 |
MVT VT = N->getSimpleValueType(0); |
| 1009 |
// Emulate v32i16/v64i8 broadcast without BWI. |
1009 |
// Emulate v32i16/v64i8 broadcast without BWI. |
| 1010 |
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
1010 |
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
| 1011 |
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
1011 |
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
| 1012 |
auto *MemNode = cast(N); |
1012 |
auto *MemNode = cast(N); |
| 1013 |
SDLoc dl(N); |
1013 |
SDLoc dl(N); |
| 1014 |
SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other); |
1014 |
SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other); |
| 1015 |
SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()}; |
1015 |
SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()}; |
| 1016 |
SDValue NarrowBCast = CurDAG->getMemIntrinsicNode( |
1016 |
SDValue NarrowBCast = CurDAG->getMemIntrinsicNode( |
| 1017 |
X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(), |
1017 |
X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(), |
| 1018 |
MemNode->getMemOperand()); |
1018 |
MemNode->getMemOperand()); |
| 1019 |
SDValue Res = |
1019 |
SDValue Res = |
| 1020 |
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
1020 |
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
| 1021 |
NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
1021 |
NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
| 1022 |
unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
1022 |
unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
| 1023 |
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
1023 |
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
| 1024 |
CurDAG->getIntPtrConstant(Index, dl)); |
1024 |
CurDAG->getIntPtrConstant(Index, dl)); |
| 1025 |
|
1025 |
|
| 1026 |
--I; |
1026 |
--I; |
| 1027 |
SDValue To[] = {Res, NarrowBCast.getValue(1)}; |
1027 |
SDValue To[] = {Res, NarrowBCast.getValue(1)}; |
| 1028 |
CurDAG->ReplaceAllUsesWith(N, To); |
1028 |
CurDAG->ReplaceAllUsesWith(N, To); |
| 1029 |
++I; |
1029 |
++I; |
| 1030 |
MadeChange = true; |
1030 |
MadeChange = true; |
| 1031 |
continue; |
1031 |
continue; |
| 1032 |
} |
1032 |
} |
| 1033 |
|
1033 |
|
| 1034 |
break; |
1034 |
break; |
| 1035 |
} |
1035 |
} |
| 1036 |
case ISD::VSELECT: { |
1036 |
case ISD::VSELECT: { |
| 1037 |
// Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG. |
1037 |
// Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG. |
| 1038 |
EVT EleVT = N->getOperand(0).getValueType().getVectorElementType(); |
1038 |
EVT EleVT = N->getOperand(0).getValueType().getVectorElementType(); |
| 1039 |
if (EleVT == MVT::i1) |
1039 |
if (EleVT == MVT::i1) |
| 1040 |
break; |
1040 |
break; |
| 1041 |
|
1041 |
|
| 1042 |
assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); |
1042 |
assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); |
| 1043 |
assert(N->getValueType(0).getVectorElementType() != MVT::i16 && |
1043 |
assert(N->getValueType(0).getVectorElementType() != MVT::i16 && |
| 1044 |
"We can't replace VSELECT with BLENDV in vXi16!"); |
1044 |
"We can't replace VSELECT with BLENDV in vXi16!"); |
| 1045 |
SDValue R; |
1045 |
SDValue R; |
| 1046 |
if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) == |
1046 |
if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) == |
| 1047 |
EleVT.getSizeInBits()) { |
1047 |
EleVT.getSizeInBits()) { |
| 1048 |
R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0), |
1048 |
R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0), |
| 1049 |
N->getOperand(0), N->getOperand(1), N->getOperand(2), |
1049 |
N->getOperand(0), N->getOperand(1), N->getOperand(2), |
| 1050 |
CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8)); |
1050 |
CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8)); |
| 1051 |
} else { |
1051 |
} else { |
| 1052 |
R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), |
1052 |
R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), |
| 1053 |
N->getOperand(0), N->getOperand(1), |
1053 |
N->getOperand(0), N->getOperand(1), |
| 1054 |
N->getOperand(2)); |
1054 |
N->getOperand(2)); |
| 1055 |
} |
1055 |
} |
| 1056 |
--I; |
1056 |
--I; |
| 1057 |
CurDAG->ReplaceAllUsesWith(N, R.getNode()); |
1057 |
CurDAG->ReplaceAllUsesWith(N, R.getNode()); |
| 1058 |
++I; |
1058 |
++I; |
| 1059 |
MadeChange = true; |
1059 |
MadeChange = true; |
| 1060 |
continue; |
1060 |
continue; |
| 1061 |
} |
1061 |
} |
| 1062 |
case ISD::FP_ROUND: |
1062 |
case ISD::FP_ROUND: |
| 1063 |
case ISD::STRICT_FP_ROUND: |
1063 |
case ISD::STRICT_FP_ROUND: |
| 1064 |
case ISD::FP_TO_SINT: |
1064 |
case ISD::FP_TO_SINT: |
| 1065 |
case ISD::FP_TO_UINT: |
1065 |
case ISD::FP_TO_UINT: |
| 1066 |
case ISD::STRICT_FP_TO_SINT: |
1066 |
case ISD::STRICT_FP_TO_SINT: |
| 1067 |
case ISD::STRICT_FP_TO_UINT: { |
1067 |
case ISD::STRICT_FP_TO_UINT: { |
| 1068 |
// Replace vector fp_to_s/uint with their X86 specific equivalent so we |
1068 |
// Replace vector fp_to_s/uint with their X86 specific equivalent so we |
| 1069 |
// don't need 2 sets of patterns. |
1069 |
// don't need 2 sets of patterns. |
| 1070 |
if (!N->getSimpleValueType(0).isVector()) |
1070 |
if (!N->getSimpleValueType(0).isVector()) |
| 1071 |
break; |
1071 |
break; |
| 1072 |
|
1072 |
|
| 1073 |
unsigned NewOpc; |
1073 |
unsigned NewOpc; |
| 1074 |
switch (N->getOpcode()) { |
1074 |
switch (N->getOpcode()) { |
| 1075 |
default: llvm_unreachable("Unexpected opcode!"); |
1075 |
default: llvm_unreachable("Unexpected opcode!"); |
| 1076 |
case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break; |
1076 |
case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break; |
| 1077 |
case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break; |
1077 |
case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break; |
| 1078 |
case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; |
1078 |
case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; |
| 1079 |
case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; |
1079 |
case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; |
| 1080 |
case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; |
1080 |
case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; |
| 1081 |
case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; |
1081 |
case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; |
| 1082 |
} |
1082 |
} |
| 1083 |
SDValue Res; |
1083 |
SDValue Res; |
| 1084 |
if (N->isStrictFPOpcode()) |
1084 |
if (N->isStrictFPOpcode()) |
| 1085 |
Res = |
1085 |
Res = |
| 1086 |
CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, |
1086 |
CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, |
| 1087 |
{N->getOperand(0), N->getOperand(1)}); |
1087 |
{N->getOperand(0), N->getOperand(1)}); |
| 1088 |
else |
1088 |
else |
| 1089 |
Res = |
1089 |
Res = |
| 1090 |
CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1090 |
CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
| 1091 |
N->getOperand(0)); |
1091 |
N->getOperand(0)); |
| 1092 |
--I; |
1092 |
--I; |
| 1093 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
1093 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
| 1094 |
++I; |
1094 |
++I; |
| 1095 |
MadeChange = true; |
1095 |
MadeChange = true; |
| 1096 |
continue; |
1096 |
continue; |
| 1097 |
} |
1097 |
} |
| 1098 |
case ISD::SHL: |
1098 |
case ISD::SHL: |
| 1099 |
case ISD::SRA: |
1099 |
case ISD::SRA: |
| 1100 |
case ISD::SRL: { |
1100 |
case ISD::SRL: { |
| 1101 |
// Replace vector shifts with their X86 specific equivalent so we don't |
1101 |
// Replace vector shifts with their X86 specific equivalent so we don't |
| 1102 |
// need 2 sets of patterns. |
1102 |
// need 2 sets of patterns. |
| 1103 |
if (!N->getValueType(0).isVector()) |
1103 |
if (!N->getValueType(0).isVector()) |
| 1104 |
break; |
1104 |
break; |
| 1105 |
|
1105 |
|
| 1106 |
unsigned NewOpc; |
1106 |
unsigned NewOpc; |
| 1107 |
switch (N->getOpcode()) { |
1107 |
switch (N->getOpcode()) { |
| 1108 |
default: llvm_unreachable("Unexpected opcode!"); |
1108 |
default: llvm_unreachable("Unexpected opcode!"); |
| 1109 |
case ISD::SHL: NewOpc = X86ISD::VSHLV; break; |
1109 |
case ISD::SHL: NewOpc = X86ISD::VSHLV; break; |
| 1110 |
case ISD::SRA: NewOpc = X86ISD::VSRAV; break; |
1110 |
case ISD::SRA: NewOpc = X86ISD::VSRAV; break; |
| 1111 |
case ISD::SRL: NewOpc = X86ISD::VSRLV; break; |
1111 |
case ISD::SRL: NewOpc = X86ISD::VSRLV; break; |
| 1112 |
} |
1112 |
} |
| 1113 |
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1113 |
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
| 1114 |
N->getOperand(0), N->getOperand(1)); |
1114 |
N->getOperand(0), N->getOperand(1)); |
| 1115 |
--I; |
1115 |
--I; |
| 1116 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1116 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
| 1117 |
++I; |
1117 |
++I; |
| 1118 |
MadeChange = true; |
1118 |
MadeChange = true; |
| 1119 |
continue; |
1119 |
continue; |
| 1120 |
} |
1120 |
} |
| 1121 |
case ISD::ANY_EXTEND: |
1121 |
case ISD::ANY_EXTEND: |
| 1122 |
case ISD::ANY_EXTEND_VECTOR_INREG: { |
1122 |
case ISD::ANY_EXTEND_VECTOR_INREG: { |
| 1123 |
// Replace vector any extend with the zero extend equivalents so we don't |
1123 |
// Replace vector any extend with the zero extend equivalents so we don't |
| 1124 |
// need 2 sets of patterns. Ignore vXi1 extensions. |
1124 |
// need 2 sets of patterns. Ignore vXi1 extensions. |
| 1125 |
if (!N->getValueType(0).isVector()) |
1125 |
if (!N->getValueType(0).isVector()) |
| 1126 |
break; |
1126 |
break; |
| 1127 |
|
1127 |
|
| 1128 |
unsigned NewOpc; |
1128 |
unsigned NewOpc; |
| 1129 |
if (N->getOperand(0).getScalarValueSizeInBits() == 1) { |
1129 |
if (N->getOperand(0).getScalarValueSizeInBits() == 1) { |
| 1130 |
assert(N->getOpcode() == ISD::ANY_EXTEND && |
1130 |
assert(N->getOpcode() == ISD::ANY_EXTEND && |
| 1131 |
"Unexpected opcode for mask vector!"); |
1131 |
"Unexpected opcode for mask vector!"); |
| 1132 |
NewOpc = ISD::SIGN_EXTEND; |
1132 |
NewOpc = ISD::SIGN_EXTEND; |
| 1133 |
} else { |
1133 |
} else { |
| 1134 |
NewOpc = N->getOpcode() == ISD::ANY_EXTEND |
1134 |
NewOpc = N->getOpcode() == ISD::ANY_EXTEND |
| 1135 |
? ISD::ZERO_EXTEND |
1135 |
? ISD::ZERO_EXTEND |
| 1136 |
: ISD::ZERO_EXTEND_VECTOR_INREG; |
1136 |
: ISD::ZERO_EXTEND_VECTOR_INREG; |
| 1137 |
} |
1137 |
} |
| 1138 |
|
1138 |
|
| 1139 |
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1139 |
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
| 1140 |
N->getOperand(0)); |
1140 |
N->getOperand(0)); |
| 1141 |
--I; |
1141 |
--I; |
| 1142 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1142 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
| 1143 |
++I; |
1143 |
++I; |
| 1144 |
MadeChange = true; |
1144 |
MadeChange = true; |
| 1145 |
continue; |
1145 |
continue; |
| 1146 |
} |
1146 |
} |
| 1147 |
case ISD::FCEIL: |
1147 |
case ISD::FCEIL: |
| 1148 |
case ISD::STRICT_FCEIL: |
1148 |
case ISD::STRICT_FCEIL: |
| 1149 |
case ISD::FFLOOR: |
1149 |
case ISD::FFLOOR: |
| 1150 |
case ISD::STRICT_FFLOOR: |
1150 |
case ISD::STRICT_FFLOOR: |
| 1151 |
case ISD::FTRUNC: |
1151 |
case ISD::FTRUNC: |
| 1152 |
case ISD::STRICT_FTRUNC: |
1152 |
case ISD::STRICT_FTRUNC: |
| 1153 |
case ISD::FROUNDEVEN: |
1153 |
case ISD::FROUNDEVEN: |
| 1154 |
case ISD::STRICT_FROUNDEVEN: |
1154 |
case ISD::STRICT_FROUNDEVEN: |
| 1155 |
case ISD::FNEARBYINT: |
1155 |
case ISD::FNEARBYINT: |
| 1156 |
case ISD::STRICT_FNEARBYINT: |
1156 |
case ISD::STRICT_FNEARBYINT: |
| 1157 |
case ISD::FRINT: |
1157 |
case ISD::FRINT: |
| 1158 |
case ISD::STRICT_FRINT: { |
1158 |
case ISD::STRICT_FRINT: { |
| 1159 |
// Replace fp rounding with their X86 specific equivalent so we don't |
1159 |
// Replace fp rounding with their X86 specific equivalent so we don't |
| 1160 |
// need 2 sets of patterns. |
1160 |
// need 2 sets of patterns. |
| 1161 |
unsigned Imm; |
1161 |
unsigned Imm; |
| 1162 |
switch (N->getOpcode()) { |
1162 |
switch (N->getOpcode()) { |
| 1163 |
default: llvm_unreachable("Unexpected opcode!"); |
1163 |
default: llvm_unreachable("Unexpected opcode!"); |
| 1164 |
case ISD::STRICT_FCEIL: |
1164 |
case ISD::STRICT_FCEIL: |
| 1165 |
case ISD::FCEIL: Imm = 0xA; break; |
1165 |
case ISD::FCEIL: Imm = 0xA; break; |
| 1166 |
case ISD::STRICT_FFLOOR: |
1166 |
case ISD::STRICT_FFLOOR: |
| 1167 |
case ISD::FFLOOR: Imm = 0x9; break; |
1167 |
case ISD::FFLOOR: Imm = 0x9; break; |
| 1168 |
case ISD::STRICT_FTRUNC: |
1168 |
case ISD::STRICT_FTRUNC: |
| 1169 |
case ISD::FTRUNC: Imm = 0xB; break; |
1169 |
case ISD::FTRUNC: Imm = 0xB; break; |
| 1170 |
case ISD::STRICT_FROUNDEVEN: |
1170 |
case ISD::STRICT_FROUNDEVEN: |
| 1171 |
case ISD::FROUNDEVEN: Imm = 0x8; break; |
1171 |
case ISD::FROUNDEVEN: Imm = 0x8; break; |
| 1172 |
case ISD::STRICT_FNEARBYINT: |
1172 |
case ISD::STRICT_FNEARBYINT: |
| 1173 |
case ISD::FNEARBYINT: Imm = 0xC; break; |
1173 |
case ISD::FNEARBYINT: Imm = 0xC; break; |
| 1174 |
case ISD::STRICT_FRINT: |
1174 |
case ISD::STRICT_FRINT: |
| 1175 |
case ISD::FRINT: Imm = 0x4; break; |
1175 |
case ISD::FRINT: Imm = 0x4; break; |
| 1176 |
} |
1176 |
} |
| 1177 |
SDLoc dl(N); |
1177 |
SDLoc dl(N); |
| 1178 |
bool IsStrict = N->isStrictFPOpcode(); |
1178 |
bool IsStrict = N->isStrictFPOpcode(); |
| 1179 |
SDValue Res; |
1179 |
SDValue Res; |
| 1180 |
if (IsStrict) |
1180 |
if (IsStrict) |
| 1181 |
Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, |
1181 |
Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, |
| 1182 |
{N->getValueType(0), MVT::Other}, |
1182 |
{N->getValueType(0), MVT::Other}, |
| 1183 |
{N->getOperand(0), N->getOperand(1), |
1183 |
{N->getOperand(0), N->getOperand(1), |
| 1184 |
CurDAG->getTargetConstant(Imm, dl, MVT::i32)}); |
1184 |
CurDAG->getTargetConstant(Imm, dl, MVT::i32)}); |
| 1185 |
else |
1185 |
else |
| 1186 |
Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), |
1186 |
Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), |
| 1187 |
N->getOperand(0), |
1187 |
N->getOperand(0), |
| 1188 |
CurDAG->getTargetConstant(Imm, dl, MVT::i32)); |
1188 |
CurDAG->getTargetConstant(Imm, dl, MVT::i32)); |
| 1189 |
--I; |
1189 |
--I; |
| 1190 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
1190 |
CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
| 1191 |
++I; |
1191 |
++I; |
| 1192 |
MadeChange = true; |
1192 |
MadeChange = true; |
| 1193 |
continue; |
1193 |
continue; |
| 1194 |
} |
1194 |
} |
| 1195 |
case X86ISD::FANDN: |
1195 |
case X86ISD::FANDN: |
| 1196 |
case X86ISD::FAND: |
1196 |
case X86ISD::FAND: |
| 1197 |
case X86ISD::FOR: |
1197 |
case X86ISD::FOR: |
| 1198 |
case X86ISD::FXOR: { |
1198 |
case X86ISD::FXOR: { |
| 1199 |
// Widen scalar fp logic ops to vector to reduce isel patterns. |
1199 |
// Widen scalar fp logic ops to vector to reduce isel patterns. |
| 1200 |
// FIXME: Can we do this during lowering/combine. |
1200 |
// FIXME: Can we do this during lowering/combine. |
| 1201 |
MVT VT = N->getSimpleValueType(0); |
1201 |
MVT VT = N->getSimpleValueType(0); |
| 1202 |
if (VT.isVector() || VT == MVT::f128) |
1202 |
if (VT.isVector() || VT == MVT::f128) |
| 1203 |
break; |
1203 |
break; |
| 1204 |
|
1204 |
|
| 1205 |
MVT VecVT = VT == MVT::f64 ? MVT::v2f64 |
1205 |
MVT VecVT = VT == MVT::f64 ? MVT::v2f64 |
| 1206 |
: VT == MVT::f32 ? MVT::v4f32 |
1206 |
: VT == MVT::f32 ? MVT::v4f32 |
| 1207 |
: MVT::v8f16; |
1207 |
: MVT::v8f16; |
| 1208 |
|
1208 |
|
| 1209 |
SDLoc dl(N); |
1209 |
SDLoc dl(N); |
| 1210 |
SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
1210 |
SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
| 1211 |
N->getOperand(0)); |
1211 |
N->getOperand(0)); |
| 1212 |
SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
1212 |
SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
| 1213 |
N->getOperand(1)); |
1213 |
N->getOperand(1)); |
| 1214 |
|
1214 |
|
| 1215 |
SDValue Res; |
1215 |
SDValue Res; |
| 1216 |
if (Subtarget->hasSSE2()) { |
1216 |
if (Subtarget->hasSSE2()) { |
| 1217 |
EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); |
1217 |
EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); |
| 1218 |
Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); |
1218 |
Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); |
| 1219 |
Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); |
1219 |
Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); |
| 1220 |
unsigned Opc; |
1220 |
unsigned Opc; |
| 1221 |
switch (N->getOpcode()) { |
1221 |
switch (N->getOpcode()) { |
| 1222 |
default: llvm_unreachable("Unexpected opcode!"); |
1222 |
default: llvm_unreachable("Unexpected opcode!"); |
| 1223 |
case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; |
1223 |
case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; |
| 1224 |
case X86ISD::FAND: Opc = ISD::AND; break; |
1224 |
case X86ISD::FAND: Opc = ISD::AND; break; |
| 1225 |
case X86ISD::FOR: Opc = ISD::OR; break; |
1225 |
case X86ISD::FOR: Opc = ISD::OR; break; |
| 1226 |
case X86ISD::FXOR: Opc = ISD::XOR; break; |
1226 |
case X86ISD::FXOR: Opc = ISD::XOR; break; |
| 1227 |
} |
1227 |
} |
| 1228 |
Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); |
1228 |
Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); |
| 1229 |
Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); |
1229 |
Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); |
| 1230 |
} else { |
1230 |
} else { |
| 1231 |
Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); |
1231 |
Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); |
| 1232 |
} |
1232 |
} |
| 1233 |
Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, |
1233 |
Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, |
| 1234 |
CurDAG->getIntPtrConstant(0, dl)); |
1234 |
CurDAG->getIntPtrConstant(0, dl)); |
| 1235 |
--I; |
1235 |
--I; |
| 1236 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1236 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
| 1237 |
++I; |
1237 |
++I; |
| 1238 |
MadeChange = true; |
1238 |
MadeChange = true; |
| 1239 |
continue; |
1239 |
continue; |
| 1240 |
} |
1240 |
} |
| 1241 |
} |
1241 |
} |
| 1242 |
|
1242 |
|
| 1243 |
if (OptLevel != CodeGenOpt::None && |
1243 |
if (OptLevel != CodeGenOpt::None && |
| 1244 |
// Only do this when the target can fold the load into the call or |
1244 |
// Only do this when the target can fold the load into the call or |
| 1245 |
// jmp. |
1245 |
// jmp. |
| 1246 |
!Subtarget->useIndirectThunkCalls() && |
1246 |
!Subtarget->useIndirectThunkCalls() && |
| 1247 |
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || |
1247 |
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || |
| 1248 |
(N->getOpcode() == X86ISD::TC_RETURN && |
1248 |
(N->getOpcode() == X86ISD::TC_RETURN && |
| 1249 |
(Subtarget->is64Bit() || |
1249 |
(Subtarget->is64Bit() || |
| 1250 |
!getTargetMachine().isPositionIndependent())))) { |
1250 |
!getTargetMachine().isPositionIndependent())))) { |
| 1251 |
/// Also try moving call address load from outside callseq_start to just |
1251 |
/// Also try moving call address load from outside callseq_start to just |
| 1252 |
/// before the call to allow it to be folded. |
1252 |
/// before the call to allow it to be folded. |
| 1253 |
/// |
1253 |
/// |
| 1254 |
/// [Load chain] |
1254 |
/// [Load chain] |
| 1255 |
/// ^ |
1255 |
/// ^ |
| 1256 |
/// | |
1256 |
/// | |
| 1257 |
/// [Load] |
1257 |
/// [Load] |
| 1258 |
/// ^ ^ |
1258 |
/// ^ ^ |
| 1259 |
/// | | |
1259 |
/// | | |
| 1260 |
/// / \-- |
1260 |
/// / \-- |
| 1261 |
/// / | |
1261 |
/// / | |
| 1262 |
///[CALLSEQ_START] | |
1262 |
///[CALLSEQ_START] | |
| 1263 |
/// ^ | |
1263 |
/// ^ | |
| 1264 |
/// | | |
1264 |
/// | | |
| 1265 |
/// [LOAD/C2Reg] | |
1265 |
/// [LOAD/C2Reg] | |
| 1266 |
/// | | |
1266 |
/// | | |
| 1267 |
/// \ / |
1267 |
/// \ / |
| 1268 |
/// \ / |
1268 |
/// \ / |
| 1269 |
/// [CALL] |
1269 |
/// [CALL] |
| 1270 |
bool HasCallSeq = N->getOpcode() == X86ISD::CALL; |
1270 |
bool HasCallSeq = N->getOpcode() == X86ISD::CALL; |
| 1271 |
SDValue Chain = N->getOperand(0); |
1271 |
SDValue Chain = N->getOperand(0); |
| 1272 |
SDValue Load = N->getOperand(1); |
1272 |
SDValue Load = N->getOperand(1); |
| 1273 |
if (!isCalleeLoad(Load, Chain, HasCallSeq)) |
1273 |
if (!isCalleeLoad(Load, Chain, HasCallSeq)) |
| 1274 |
continue; |
1274 |
continue; |
| 1275 |
moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); |
1275 |
moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); |
| 1276 |
++NumLoadMoved; |
1276 |
++NumLoadMoved; |
| 1277 |
MadeChange = true; |
1277 |
MadeChange = true; |
| 1278 |
continue; |
1278 |
continue; |
| 1279 |
} |
1279 |
} |
| 1280 |
|
1280 |
|
| 1281 |
// Lower fpround and fpextend nodes that target the FP stack to be store and |
1281 |
// Lower fpround and fpextend nodes that target the FP stack to be store and |
| 1282 |
// load to the stack. This is a gross hack. We would like to simply mark |
1282 |
// load to the stack. This is a gross hack. We would like to simply mark |
| 1283 |
// these as being illegal, but when we do that, legalize produces these when |
1283 |
// these as being illegal, but when we do that, legalize produces these when |
| 1284 |
// it expands calls, then expands these in the same legalize pass. We would |
1284 |
// it expands calls, then expands these in the same legalize pass. We would |
| 1285 |
// like dag combine to be able to hack on these between the call expansion |
1285 |
// like dag combine to be able to hack on these between the call expansion |
| 1286 |
// and the node legalization. As such this pass basically does "really |
1286 |
// and the node legalization. As such this pass basically does "really |
| 1287 |
// late" legalization of these inline with the X86 isel pass. |
1287 |
// late" legalization of these inline with the X86 isel pass. |
| 1288 |
// FIXME: This should only happen when not compiled with -O0. |
1288 |
// FIXME: This should only happen when not compiled with -O0. |
| 1289 |
switch (N->getOpcode()) { |
1289 |
switch (N->getOpcode()) { |
| 1290 |
default: continue; |
1290 |
default: continue; |
| 1291 |
case ISD::FP_ROUND: |
1291 |
case ISD::FP_ROUND: |
| 1292 |
case ISD::FP_EXTEND: |
1292 |
case ISD::FP_EXTEND: |
| 1293 |
{ |
1293 |
{ |
| 1294 |
MVT SrcVT = N->getOperand(0).getSimpleValueType(); |
1294 |
MVT SrcVT = N->getOperand(0).getSimpleValueType(); |
| 1295 |
MVT DstVT = N->getSimpleValueType(0); |
1295 |
MVT DstVT = N->getSimpleValueType(0); |
| 1296 |
|
1296 |
|
| 1297 |
// If any of the sources are vectors, no fp stack involved. |
1297 |
// If any of the sources are vectors, no fp stack involved. |
| 1298 |
if (SrcVT.isVector() || DstVT.isVector()) |
1298 |
if (SrcVT.isVector() || DstVT.isVector()) |
| 1299 |
continue; |
1299 |
continue; |
| 1300 |
|
1300 |
|
| 1301 |
// If the source and destination are SSE registers, then this is a legal |
1301 |
// If the source and destination are SSE registers, then this is a legal |
| 1302 |
// conversion that should not be lowered. |
1302 |
// conversion that should not be lowered. |
| 1303 |
const X86TargetLowering *X86Lowering = |
1303 |
const X86TargetLowering *X86Lowering = |
| 1304 |
static_cast(TLI); |
1304 |
static_cast(TLI); |
| 1305 |
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
1305 |
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
| 1306 |
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
1306 |
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
| 1307 |
if (SrcIsSSE && DstIsSSE) |
1307 |
if (SrcIsSSE && DstIsSSE) |
| 1308 |
continue; |
1308 |
continue; |
| 1309 |
|
1309 |
|
| 1310 |
if (!SrcIsSSE && !DstIsSSE) { |
1310 |
if (!SrcIsSSE && !DstIsSSE) { |
| 1311 |
// If this is an FPStack extension, it is a noop. |
1311 |
// If this is an FPStack extension, it is a noop. |
| 1312 |
if (N->getOpcode() == ISD::FP_EXTEND) |
1312 |
if (N->getOpcode() == ISD::FP_EXTEND) |
| 1313 |
continue; |
1313 |
continue; |
| 1314 |
// If this is a value-preserving FPStack truncation, it is a noop. |
1314 |
// If this is a value-preserving FPStack truncation, it is a noop. |
| 1315 |
if (N->getConstantOperandVal(1)) |
1315 |
if (N->getConstantOperandVal(1)) |
| 1316 |
continue; |
1316 |
continue; |
| 1317 |
} |
1317 |
} |
| 1318 |
|
1318 |
|
| 1319 |
// Here we could have an FP stack truncation or an FPStack <-> SSE convert. |
1319 |
// Here we could have an FP stack truncation or an FPStack <-> SSE convert. |
| 1320 |
// FPStack has extload and truncstore. SSE can fold direct loads into other |
1320 |
// FPStack has extload and truncstore. SSE can fold direct loads into other |
| 1321 |
// operations. Based on this, decide what we want to do. |
1321 |
// operations. Based on this, decide what we want to do. |
| 1322 |
MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; |
1322 |
MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; |
| 1323 |
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
1323 |
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
| 1324 |
int SPFI = cast(MemTmp)->getIndex(); |
1324 |
int SPFI = cast(MemTmp)->getIndex(); |
| 1325 |
MachinePointerInfo MPI = |
1325 |
MachinePointerInfo MPI = |
| 1326 |
MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
1326 |
MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
| 1327 |
SDLoc dl(N); |
1327 |
SDLoc dl(N); |
| 1328 |
|
1328 |
|
| 1329 |
// FIXME: optimize the case where the src/dest is a load or store? |
1329 |
// FIXME: optimize the case where the src/dest is a load or store? |
| 1330 |
|
1330 |
|
| 1331 |
SDValue Store = CurDAG->getTruncStore( |
1331 |
SDValue Store = CurDAG->getTruncStore( |
| 1332 |
CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT); |
1332 |
CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT); |
| 1333 |
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, |
1333 |
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, |
| 1334 |
MemTmp, MPI, MemVT); |
1334 |
MemTmp, MPI, MemVT); |
| 1335 |
|
1335 |
|
| 1336 |
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the |
1336 |
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the |
| 1337 |
// extload we created. This will cause general havok on the dag because |
1337 |
// extload we created. This will cause general havok on the dag because |
| 1338 |
// anything below the conversion could be folded into other existing nodes. |
1338 |
// anything below the conversion could be folded into other existing nodes. |
| 1339 |
// To avoid invalidating 'I', back it up to the convert node. |
1339 |
// To avoid invalidating 'I', back it up to the convert node. |
| 1340 |
--I; |
1340 |
--I; |
| 1341 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); |
1341 |
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); |
| 1342 |
break; |
1342 |
break; |
| 1343 |
} |
1343 |
} |
| 1344 |
|
1344 |
|
| 1345 |
//The sequence of events for lowering STRICT_FP versions of these nodes requires |
1345 |
//The sequence of events for lowering STRICT_FP versions of these nodes requires |
| 1346 |
//dealing with the chain differently, as there is already a preexisting chain. |
1346 |
//dealing with the chain differently, as there is already a preexisting chain. |
| 1347 |
case ISD::STRICT_FP_ROUND: |
1347 |
case ISD::STRICT_FP_ROUND: |
| 1348 |
case ISD::STRICT_FP_EXTEND: |
1348 |
case ISD::STRICT_FP_EXTEND: |
| 1349 |
{ |
1349 |
{ |
| 1350 |
MVT SrcVT = N->getOperand(1).getSimpleValueType(); |
1350 |
MVT SrcVT = N->getOperand(1).getSimpleValueType(); |
| 1351 |
MVT DstVT = N->getSimpleValueType(0); |
1351 |
MVT DstVT = N->getSimpleValueType(0); |
| 1352 |
|
1352 |
|
| 1353 |
// If any of the sources are vectors, no fp stack involved. |
1353 |
// If any of the sources are vectors, no fp stack involved. |
| 1354 |
if (SrcVT.isVector() || DstVT.isVector()) |
1354 |
if (SrcVT.isVector() || DstVT.isVector()) |
| 1355 |
continue; |
1355 |
continue; |
| 1356 |
|
1356 |
|
| 1357 |
// If the source and destination are SSE registers, then this is a legal |
1357 |
// If the source and destination are SSE registers, then this is a legal |
| 1358 |
// conversion that should not be lowered. |
1358 |
// conversion that should not be lowered. |
| 1359 |
const X86TargetLowering *X86Lowering = |
1359 |
const X86TargetLowering *X86Lowering = |
| 1360 |
static_cast(TLI); |
1360 |
static_cast(TLI); |
| 1361 |
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
1361 |
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
| 1362 |
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
1362 |
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
| 1363 |
if (SrcIsSSE && DstIsSSE) |
1363 |
if (SrcIsSSE && DstIsSSE) |
| 1364 |
continue; |
1364 |
continue; |
| 1365 |
|
1365 |
|
| 1366 |
if (!SrcIsSSE && !DstIsSSE) { |
1366 |
if (!SrcIsSSE && !DstIsSSE) { |
| 1367 |
// If this is an FPStack extension, it is a noop. |
1367 |
// If this is an FPStack extension, it is a noop. |
| 1368 |
if (N->getOpcode() == ISD::STRICT_FP_EXTEND) |
1368 |
if (N->getOpcode() == ISD::STRICT_FP_EXTEND) |
| 1369 |
continue; |
1369 |
continue; |
| 1370 |
// If this is a value-preserving FPStack truncation, it is a noop. |
1370 |
// If this is a value-preserving FPStack truncation, it is a noop. |
| 1371 |
if (N->getConstantOperandVal(2)) |
1371 |
if (N->getConstantOperandVal(2)) |
| 1372 |
continue; |
1372 |
continue; |
| 1373 |
} |
1373 |
} |
| 1374 |
|
1374 |
|
| 1375 |
// Here we could have an FP stack truncation or an FPStack <-> SSE convert. |
1375 |
// Here we could have an FP stack truncation or an FPStack <-> SSE convert. |
| 1376 |
// FPStack has extload and truncstore. SSE can fold direct loads into other |
1376 |
// FPStack has extload and truncstore. SSE can fold direct loads into other |
| 1377 |
// operations. Based on this, decide what we want to do. |
1377 |
// operations. Based on this, decide what we want to do. |
| 1378 |
MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; |
1378 |
MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; |
| 1379 |
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
1379 |
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
| 1380 |
int SPFI = cast(MemTmp)->getIndex(); |
1380 |
int SPFI = cast(MemTmp)->getIndex(); |
| 1381 |
MachinePointerInfo MPI = |
1381 |
MachinePointerInfo MPI = |
| 1382 |
MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
1382 |
MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
| 1383 |
SDLoc dl(N); |
1383 |
SDLoc dl(N); |
| 1384 |
|
1384 |
|
| 1385 |
// FIXME: optimize the case where the src/dest is a load or store? |
1385 |
// FIXME: optimize the case where the src/dest is a load or store? |
| 1386 |
|
1386 |
|
| 1387 |
//Since the operation is StrictFP, use the preexisting chain. |
1387 |
//Since the operation is StrictFP, use the preexisting chain. |
| 1388 |
SDValue Store, Result; |
1388 |
SDValue Store, Result; |
| 1389 |
if (!SrcIsSSE) { |
1389 |
if (!SrcIsSSE) { |
| 1390 |
SDVTList VTs = CurDAG->getVTList(MVT::Other); |
1390 |
SDVTList VTs = CurDAG->getVTList(MVT::Other); |
| 1391 |
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; |
1391 |
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; |
| 1392 |
Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, |
1392 |
Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, |
| 1393 |
MPI, /*Align*/ std::nullopt, |
1393 |
MPI, /*Align*/ std::nullopt, |
| 1394 |
MachineMemOperand::MOStore); |
1394 |
MachineMemOperand::MOStore); |
| 1395 |
if (N->getFlags().hasNoFPExcept()) { |
1395 |
if (N->getFlags().hasNoFPExcept()) { |
| 1396 |
SDNodeFlags Flags = Store->getFlags(); |
1396 |
SDNodeFlags Flags = Store->getFlags(); |
| 1397 |
Flags.setNoFPExcept(true); |
1397 |
Flags.setNoFPExcept(true); |
| 1398 |
Store->setFlags(Flags); |
1398 |
Store->setFlags(Flags); |
| 1399 |
} |
1399 |
} |
| 1400 |
} else { |
1400 |
} else { |
| 1401 |
assert(SrcVT == MemVT && "Unexpected VT!"); |
1401 |
assert(SrcVT == MemVT && "Unexpected VT!"); |
| 1402 |
Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp, |
1402 |
Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp, |
| 1403 |
MPI); |
1403 |
MPI); |
| 1404 |
} |
1404 |
} |
| 1405 |
|
1405 |
|
| 1406 |
if (!DstIsSSE) { |
1406 |
if (!DstIsSSE) { |
| 1407 |
SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); |
1407 |
SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); |
| 1408 |
SDValue Ops[] = {Store, MemTmp}; |
1408 |
SDValue Ops[] = {Store, MemTmp}; |
| 1409 |
Result = CurDAG->getMemIntrinsicNode( |
1409 |
Result = CurDAG->getMemIntrinsicNode( |
| 1410 |
X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, |
1410 |
X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, |
| 1411 |
/*Align*/ std::nullopt, MachineMemOperand::MOLoad); |
1411 |
/*Align*/ std::nullopt, MachineMemOperand::MOLoad); |
| 1412 |
if (N->getFlags().hasNoFPExcept()) { |
1412 |
if (N->getFlags().hasNoFPExcept()) { |
| 1413 |
SDNodeFlags Flags = Result->getFlags(); |
1413 |
SDNodeFlags Flags = Result->getFlags(); |
| 1414 |
Flags.setNoFPExcept(true); |
1414 |
Flags.setNoFPExcept(true); |
| 1415 |
Result->setFlags(Flags); |
1415 |
Result->setFlags(Flags); |
| 1416 |
} |
1416 |
} |
| 1417 |
} else { |
1417 |
} else { |
| 1418 |
assert(DstVT == MemVT && "Unexpected VT!"); |
1418 |
assert(DstVT == MemVT && "Unexpected VT!"); |
| 1419 |
Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI); |
1419 |
Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI); |
| 1420 |
} |
1420 |
} |
| 1421 |
|
1421 |
|
| 1422 |
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the |
1422 |
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the |
| 1423 |
// extload we created. This will cause general havok on the dag because |
1423 |
// extload we created. This will cause general havok on the dag because |
| 1424 |
// anything below the conversion could be folded into other existing nodes. |
1424 |
// anything below the conversion could be folded into other existing nodes. |
| 1425 |
// To avoid invalidating 'I', back it up to the convert node. |
1425 |
// To avoid invalidating 'I', back it up to the convert node. |
| 1426 |
--I; |
1426 |
--I; |
| 1427 |
CurDAG->ReplaceAllUsesWith(N, Result.getNode()); |
1427 |
CurDAG->ReplaceAllUsesWith(N, Result.getNode()); |
| 1428 |
break; |
1428 |
break; |
| 1429 |
} |
1429 |
} |
| 1430 |
} |
1430 |
} |
| 1431 |
|
1431 |
|
| 1432 |
|
1432 |
|
| 1433 |
// Now that we did that, the node is dead. Increment the iterator to the |
1433 |
// Now that we did that, the node is dead. Increment the iterator to the |
| 1434 |
// next node to process, then delete N. |
1434 |
// next node to process, then delete N. |
| 1435 |
++I; |
1435 |
++I; |
| 1436 |
MadeChange = true; |
1436 |
MadeChange = true; |
| 1437 |
} |
1437 |
} |
| 1438 |
|
1438 |
|
| 1439 |
// Remove any dead nodes that may have been left behind. |
1439 |
// Remove any dead nodes that may have been left behind. |
| 1440 |
if (MadeChange) |
1440 |
if (MadeChange) |
| 1441 |
CurDAG->RemoveDeadNodes(); |
1441 |
CurDAG->RemoveDeadNodes(); |
| 1442 |
} |
1442 |
} |
| 1443 |
|
1443 |
|
| 1444 |
// Look for a redundant movzx/movsx that can occur after an 8-bit divrem. |
1444 |
// Look for a redundant movzx/movsx that can occur after an 8-bit divrem. |
| 1445 |
bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { |
1445 |
bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { |
| 1446 |
unsigned Opc = N->getMachineOpcode(); |
1446 |
unsigned Opc = N->getMachineOpcode(); |
| 1447 |
if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && |
1447 |
if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && |
| 1448 |
Opc != X86::MOVSX64rr8) |
1448 |
Opc != X86::MOVSX64rr8) |
| 1449 |
return false; |
1449 |
return false; |
| 1450 |
|
1450 |
|
| 1451 |
SDValue N0 = N->getOperand(0); |
1451 |
SDValue N0 = N->getOperand(0); |
| 1452 |
|
1452 |
|
| 1453 |
// We need to be extracting the lower bit of an extend. |
1453 |
// We need to be extracting the lower bit of an extend. |
| 1454 |
if (!N0.isMachineOpcode() || |
1454 |
if (!N0.isMachineOpcode() || |
| 1455 |
N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || |
1455 |
N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || |
| 1456 |
N0.getConstantOperandVal(1) != X86::sub_8bit) |
1456 |
N0.getConstantOperandVal(1) != X86::sub_8bit) |
| 1457 |
return false; |
1457 |
return false; |
| 1458 |
|
1458 |
|
| 1459 |
// We're looking for either a movsx or movzx to match the original opcode. |
1459 |
// We're looking for either a movsx or movzx to match the original opcode. |
| 1460 |
unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX |
1460 |
unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX |
| 1461 |
: X86::MOVSX32rr8_NOREX; |
1461 |
: X86::MOVSX32rr8_NOREX; |
| 1462 |
SDValue N00 = N0.getOperand(0); |
1462 |
SDValue N00 = N0.getOperand(0); |
| 1463 |
if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) |
1463 |
if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) |
| 1464 |
return false; |
1464 |
return false; |
| 1465 |
|
1465 |
|
| 1466 |
if (Opc == X86::MOVSX64rr8) { |
1466 |
if (Opc == X86::MOVSX64rr8) { |
| 1467 |
// If we had a sign extend from 8 to 64 bits. We still need to go from 32 |
1467 |
// If we had a sign extend from 8 to 64 bits. We still need to go from 32 |
| 1468 |
// to 64. |
1468 |
// to 64. |
| 1469 |
MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), |
1469 |
MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), |
| 1470 |
MVT::i64, N00); |
1470 |
MVT::i64, N00); |
| 1471 |
ReplaceUses(N, Extend); |
1471 |
ReplaceUses(N, Extend); |
| 1472 |
} else { |
1472 |
} else { |
| 1473 |
// Ok we can drop this extend and just use the original extend. |
1473 |
// Ok we can drop this extend and just use the original extend. |
| 1474 |
ReplaceUses(N, N00.getNode()); |
1474 |
ReplaceUses(N, N00.getNode()); |
| 1475 |
} |
1475 |
} |
| 1476 |
|
1476 |
|
| 1477 |
return true; |
1477 |
return true; |
| 1478 |
} |
1478 |
} |
| 1479 |
|
1479 |
|
| 1480 |
void X86DAGToDAGISel::PostprocessISelDAG() { |
1480 |
void X86DAGToDAGISel::PostprocessISelDAG() { |
| 1481 |
// Skip peepholes at -O0. |
1481 |
// Skip peepholes at -O0. |
| 1482 |
if (TM.getOptLevel() == CodeGenOpt::None) |
1482 |
if (TM.getOptLevel() == CodeGenOpt::None) |
| 1483 |
return; |
1483 |
return; |
| 1484 |
|
1484 |
|
| 1485 |
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
1485 |
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
| 1486 |
|
1486 |
|
| 1487 |
bool MadeChange = false; |
1487 |
bool MadeChange = false; |
| 1488 |
while (Position != CurDAG->allnodes_begin()) { |
1488 |
while (Position != CurDAG->allnodes_begin()) { |
| 1489 |
SDNode *N = &*--Position; |
1489 |
SDNode *N = &*--Position; |
| 1490 |
// Skip dead nodes and any non-machine opcodes. |
1490 |
// Skip dead nodes and any non-machine opcodes. |
| 1491 |
if (N->use_empty() || !N->isMachineOpcode()) |
1491 |
if (N->use_empty() || !N->isMachineOpcode()) |
| 1492 |
continue; |
1492 |
continue; |
| 1493 |
|
1493 |
|
| 1494 |
if (tryOptimizeRem8Extend(N)) { |
1494 |
if (tryOptimizeRem8Extend(N)) { |
| 1495 |
MadeChange = true; |
1495 |
MadeChange = true; |
| 1496 |
continue; |
1496 |
continue; |
| 1497 |
} |
1497 |
} |
| 1498 |
|
1498 |
|
| 1499 |
// Look for a TESTrr+ANDrr pattern where both operands of the test are |
1499 |
// Look for a TESTrr+ANDrr pattern where both operands of the test are |
| 1500 |
// the same. Rewrite to remove the AND. |
1500 |
// the same. Rewrite to remove the AND. |
| 1501 |
unsigned Opc = N->getMachineOpcode(); |
1501 |
unsigned Opc = N->getMachineOpcode(); |
| 1502 |
if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || |
1502 |
if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || |
| 1503 |
Opc == X86::TEST32rr || Opc == X86::TEST64rr) && |
1503 |
Opc == X86::TEST32rr || Opc == X86::TEST64rr) && |
| 1504 |
N->getOperand(0) == N->getOperand(1) && |
1504 |
N->getOperand(0) == N->getOperand(1) && |
| 1505 |
N->getOperand(0)->hasNUsesOfValue(2, N->getOperand(0).getResNo()) && |
1505 |
N->getOperand(0)->hasNUsesOfValue(2, N->getOperand(0).getResNo()) && |
| 1506 |
N->getOperand(0).isMachineOpcode()) { |
1506 |
N->getOperand(0).isMachineOpcode()) { |
| 1507 |
SDValue And = N->getOperand(0); |
1507 |
SDValue And = N->getOperand(0); |
| 1508 |
unsigned N0Opc = And.getMachineOpcode(); |
1508 |
unsigned N0Opc = And.getMachineOpcode(); |
| 1509 |
if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || |
1509 |
if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || |
| 1510 |
N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) && |
1510 |
N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) && |
| 1511 |
!And->hasAnyUseOfValue(1)) { |
1511 |
!And->hasAnyUseOfValue(1)) { |
| 1512 |
MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), |
1512 |
MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), |
| 1513 |
MVT::i32, |
1513 |
MVT::i32, |
| 1514 |
And.getOperand(0), |
1514 |
And.getOperand(0), |
| 1515 |
And.getOperand(1)); |
1515 |
And.getOperand(1)); |
| 1516 |
ReplaceUses(N, Test); |
1516 |
ReplaceUses(N, Test); |
| 1517 |
MadeChange = true; |
1517 |
MadeChange = true; |
| 1518 |
continue; |
1518 |
continue; |
| 1519 |
} |
1519 |
} |
| 1520 |
if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || |
1520 |
if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || |
| 1521 |
N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) && |
1521 |
N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) && |
| 1522 |
!And->hasAnyUseOfValue(1)) { |
1522 |
!And->hasAnyUseOfValue(1)) { |
| 1523 |
unsigned NewOpc; |
1523 |
unsigned NewOpc; |
| 1524 |
switch (N0Opc) { |
1524 |
switch (N0Opc) { |
| 1525 |
case X86::AND8rm: NewOpc = X86::TEST8mr; break; |
1525 |
case X86::AND8rm: NewOpc = X86::TEST8mr; break; |
| 1526 |
case X86::AND16rm: NewOpc = X86::TEST16mr; break; |
1526 |
case X86::AND16rm: NewOpc = X86::TEST16mr; break; |
| 1527 |
case X86::AND32rm: NewOpc = X86::TEST32mr; break; |
1527 |
case X86::AND32rm: NewOpc = X86::TEST32mr; break; |
| 1528 |
case X86::AND64rm: NewOpc = X86::TEST64mr; break; |
1528 |
case X86::AND64rm: NewOpc = X86::TEST64mr; break; |
| 1529 |
} |
1529 |
} |
| 1530 |
|
1530 |
|
| 1531 |
// Need to swap the memory and register operand. |
1531 |
// Need to swap the memory and register operand. |
| 1532 |
SDValue Ops[] = { And.getOperand(1), |
1532 |
SDValue Ops[] = { And.getOperand(1), |
| 1533 |
And.getOperand(2), |
1533 |
And.getOperand(2), |
| 1534 |
And.getOperand(3), |
1534 |
And.getOperand(3), |
| 1535 |
And.getOperand(4), |
1535 |
And.getOperand(4), |
| 1536 |
And.getOperand(5), |
1536 |
And.getOperand(5), |
| 1537 |
And.getOperand(0), |
1537 |
And.getOperand(0), |
| 1538 |
And.getOperand(6) /* Chain */ }; |
1538 |
And.getOperand(6) /* Chain */ }; |
| 1539 |
MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
1539 |
MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
| 1540 |
MVT::i32, MVT::Other, Ops); |
1540 |
MVT::i32, MVT::Other, Ops); |
| 1541 |
CurDAG->setNodeMemRefs( |
1541 |
CurDAG->setNodeMemRefs( |
| 1542 |
Test, cast(And.getNode())->memoperands()); |
1542 |
Test, cast(And.getNode())->memoperands()); |
| 1543 |
ReplaceUses(And.getValue(2), SDValue(Test, 1)); |
1543 |
ReplaceUses(And.getValue(2), SDValue(Test, 1)); |
| 1544 |
ReplaceUses(SDValue(N, 0), SDValue(Test, 0)); |
1544 |
ReplaceUses(SDValue(N, 0), SDValue(Test, 0)); |
| 1545 |
MadeChange = true; |
1545 |
MadeChange = true; |
| 1546 |
continue; |
1546 |
continue; |
| 1547 |
} |
1547 |
} |
| 1548 |
} |
1548 |
} |
| 1549 |
|
1549 |
|
| 1550 |
// Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is |
1550 |
// Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is |
| 1551 |
// used. We're doing this late so we can prefer to fold the AND into masked |
1551 |
// used. We're doing this late so we can prefer to fold the AND into masked |
| 1552 |
// comparisons. Doing that can be better for the live range of the mask |
1552 |
// comparisons. Doing that can be better for the live range of the mask |
| 1553 |
// register. |
1553 |
// register. |
| 1554 |
if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || |
1554 |
if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || |
| 1555 |
Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && |
1555 |
Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && |
| 1556 |
N->getOperand(0) == N->getOperand(1) && |
1556 |
N->getOperand(0) == N->getOperand(1) && |
| 1557 |
N->isOnlyUserOf(N->getOperand(0).getNode()) && |
1557 |
N->isOnlyUserOf(N->getOperand(0).getNode()) && |
| 1558 |
N->getOperand(0).isMachineOpcode() && |
1558 |
N->getOperand(0).isMachineOpcode() && |
| 1559 |
onlyUsesZeroFlag(SDValue(N, 0))) { |
1559 |
onlyUsesZeroFlag(SDValue(N, 0))) { |
| 1560 |
SDValue And = N->getOperand(0); |
1560 |
SDValue And = N->getOperand(0); |
| 1561 |
unsigned N0Opc = And.getMachineOpcode(); |
1561 |
unsigned N0Opc = And.getMachineOpcode(); |
| 1562 |
// KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other |
1562 |
// KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other |
| 1563 |
// KAND instructions and KTEST use the same ISA feature. |
1563 |
// KAND instructions and KTEST use the same ISA feature. |
| 1564 |
if (N0Opc == X86::KANDBrr || |
1564 |
if (N0Opc == X86::KANDBrr || |
| 1565 |
(N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || |
1565 |
(N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || |
| 1566 |
N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { |
1566 |
N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { |
| 1567 |
unsigned NewOpc; |
1567 |
unsigned NewOpc; |
| 1568 |
switch (Opc) { |
1568 |
switch (Opc) { |
| 1569 |
default: llvm_unreachable("Unexpected opcode!"); |
1569 |
default: llvm_unreachable("Unexpected opcode!"); |
| 1570 |
case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; |
1570 |
case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; |
| 1571 |
case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; |
1571 |
case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; |
| 1572 |
case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; |
1572 |
case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; |
| 1573 |
case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; |
1573 |
case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; |
| 1574 |
} |
1574 |
} |
| 1575 |
MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
1575 |
MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
| 1576 |
MVT::i32, |
1576 |
MVT::i32, |
| 1577 |
And.getOperand(0), |
1577 |
And.getOperand(0), |
| 1578 |
And.getOperand(1)); |
1578 |
And.getOperand(1)); |
| 1579 |
ReplaceUses(N, KTest); |
1579 |
ReplaceUses(N, KTest); |
| 1580 |
MadeChange = true; |
1580 |
MadeChange = true; |
| 1581 |
continue; |
1581 |
continue; |
| 1582 |
} |
1582 |
} |
| 1583 |
} |
1583 |
} |
| 1584 |
|
1584 |
|
| 1585 |
// Attempt to remove vectors moves that were inserted to zero upper bits. |
1585 |
// Attempt to remove vectors moves that were inserted to zero upper bits. |
| 1586 |
if (Opc != TargetOpcode::SUBREG_TO_REG) |
1586 |
if (Opc != TargetOpcode::SUBREG_TO_REG) |
| 1587 |
continue; |
1587 |
continue; |
| 1588 |
|
1588 |
|
| 1589 |
unsigned SubRegIdx = N->getConstantOperandVal(2); |
1589 |
unsigned SubRegIdx = N->getConstantOperandVal(2); |
| 1590 |
if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) |
1590 |
if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) |
| 1591 |
continue; |
1591 |
continue; |
| 1592 |
|
1592 |
|
| 1593 |
SDValue Move = N->getOperand(1); |
1593 |
SDValue Move = N->getOperand(1); |
| 1594 |
if (!Move.isMachineOpcode()) |
1594 |
if (!Move.isMachineOpcode()) |
| 1595 |
continue; |
1595 |
continue; |
| 1596 |
|
1596 |
|
| 1597 |
// Make sure its one of the move opcodes we recognize. |
1597 |
// Make sure its one of the move opcodes we recognize. |
| 1598 |
switch (Move.getMachineOpcode()) { |
1598 |
switch (Move.getMachineOpcode()) { |
| 1599 |
default: |
1599 |
default: |
| 1600 |
continue; |
1600 |
continue; |
| 1601 |
case X86::VMOVAPDrr: case X86::VMOVUPDrr: |
1601 |
case X86::VMOVAPDrr: case X86::VMOVUPDrr: |
| 1602 |
case X86::VMOVAPSrr: case X86::VMOVUPSrr: |
1602 |
case X86::VMOVAPSrr: case X86::VMOVUPSrr: |
| 1603 |
case X86::VMOVDQArr: case X86::VMOVDQUrr: |
1603 |
case X86::VMOVDQArr: case X86::VMOVDQUrr: |
| 1604 |
case X86::VMOVAPDYrr: case X86::VMOVUPDYrr: |
1604 |
case X86::VMOVAPDYrr: case X86::VMOVUPDYrr: |
| 1605 |
case X86::VMOVAPSYrr: case X86::VMOVUPSYrr: |
1605 |
case X86::VMOVAPSYrr: case X86::VMOVUPSYrr: |
| 1606 |
case X86::VMOVDQAYrr: case X86::VMOVDQUYrr: |
1606 |
case X86::VMOVDQAYrr: case X86::VMOVDQUYrr: |
| 1607 |
case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr: |
1607 |
case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr: |
| 1608 |
case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr: |
1608 |
case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr: |
| 1609 |
case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr: |
1609 |
case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr: |
| 1610 |
case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr: |
1610 |
case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr: |
| 1611 |
case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr: |
1611 |
case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr: |
| 1612 |
case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr: |
1612 |
case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr: |
| 1613 |
case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr: |
1613 |
case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr: |
| 1614 |
case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr: |
1614 |
case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr: |
| 1615 |
break; |
1615 |
break; |
| 1616 |
} |
1616 |
} |
| 1617 |
|
1617 |
|
| 1618 |
SDValue In = Move.getOperand(0); |
1618 |
SDValue In = Move.getOperand(0); |
| 1619 |
if (!In.isMachineOpcode() || |
1619 |
if (!In.isMachineOpcode() || |
| 1620 |
In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) |
1620 |
In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) |
| 1621 |
continue; |
1621 |
continue; |
| 1622 |
|
1622 |
|
| 1623 |
// Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers |
1623 |
// Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers |
| 1624 |
// the SHA instructions which use a legacy encoding. |
1624 |
// the SHA instructions which use a legacy encoding. |
| 1625 |
uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags; |
1625 |
uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags; |
| 1626 |
if ((TSFlags & X86II::EncodingMask) != X86II::VEX && |
1626 |
if ((TSFlags & X86II::EncodingMask) != X86II::VEX && |
| 1627 |
(TSFlags & X86II::EncodingMask) != X86II::EVEX && |
1627 |
(TSFlags & X86II::EncodingMask) != X86II::EVEX && |
| 1628 |
(TSFlags & X86II::EncodingMask) != X86II::XOP) |
1628 |
(TSFlags & X86II::EncodingMask) != X86II::XOP) |
| 1629 |
continue; |
1629 |
continue; |
| 1630 |
|
1630 |
|
| 1631 |
// Producing instruction is another vector instruction. We can drop the |
1631 |
// Producing instruction is another vector instruction. We can drop the |
| 1632 |
// move. |
1632 |
// move. |
| 1633 |
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); |
1633 |
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); |
| 1634 |
MadeChange = true; |
1634 |
MadeChange = true; |
| 1635 |
} |
1635 |
} |
| 1636 |
|
1636 |
|
| 1637 |
if (MadeChange) |
1637 |
if (MadeChange) |
| 1638 |
CurDAG->RemoveDeadNodes(); |
1638 |
CurDAG->RemoveDeadNodes(); |
| 1639 |
} |
1639 |
} |
| 1640 |
|
1640 |
|
| 1641 |
|
1641 |
|
| 1642 |
/// Emit any code that needs to be executed only in the main function. |
1642 |
/// Emit any code that needs to be executed only in the main function. |
| 1643 |
void X86DAGToDAGISel::emitSpecialCodeForMain() { |
1643 |
void X86DAGToDAGISel::emitSpecialCodeForMain() { |
| 1644 |
if (Subtarget->isTargetCygMing()) { |
1644 |
if (Subtarget->isTargetCygMing()) { |
| 1645 |
TargetLowering::ArgListTy Args; |
1645 |
TargetLowering::ArgListTy Args; |
| 1646 |
auto &DL = CurDAG->getDataLayout(); |
1646 |
auto &DL = CurDAG->getDataLayout(); |
| 1647 |
|
1647 |
|
| 1648 |
TargetLowering::CallLoweringInfo CLI(*CurDAG); |
1648 |
TargetLowering::CallLoweringInfo CLI(*CurDAG); |
| 1649 |
CLI.setChain(CurDAG->getRoot()) |
1649 |
CLI.setChain(CurDAG->getRoot()) |
| 1650 |
.setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), |
1650 |
.setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), |
| 1651 |
CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), |
1651 |
CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), |
| 1652 |
std::move(Args)); |
1652 |
std::move(Args)); |
| 1653 |
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); |
1653 |
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); |
| 1654 |
std::pair Result = TLI.LowerCallTo(CLI); |
1654 |
std::pair Result = TLI.LowerCallTo(CLI); |
| 1655 |
CurDAG->setRoot(Result.second); |
1655 |
CurDAG->setRoot(Result.second); |
| 1656 |
} |
1656 |
} |
| 1657 |
} |
1657 |
} |
| 1658 |
|
1658 |
|
| 1659 |
void X86DAGToDAGISel::emitFunctionEntryCode() { |
1659 |
void X86DAGToDAGISel::emitFunctionEntryCode() { |
| 1660 |
// If this is main, emit special code for main. |
1660 |
// If this is main, emit special code for main. |
| 1661 |
const Function &F = MF->getFunction(); |
1661 |
const Function &F = MF->getFunction(); |
| 1662 |
if (F.hasExternalLinkage() && F.getName() == "main") |
1662 |
if (F.hasExternalLinkage() && F.getName() == "main") |
| 1663 |
emitSpecialCodeForMain(); |
1663 |
emitSpecialCodeForMain(); |
| 1664 |
} |
1664 |
} |
| 1665 |
|
1665 |
|
| 1666 |
static bool isDispSafeForFrameIndex(int64_t Val) { |
1666 |
static bool isDispSafeForFrameIndex(int64_t Val) { |
| 1667 |
// On 64-bit platforms, we can run into an issue where a frame index |
1667 |
// On 64-bit platforms, we can run into an issue where a frame index |
| 1668 |
// includes a displacement that, when added to the explicit displacement, |
1668 |
// includes a displacement that, when added to the explicit displacement, |
| 1669 |
// will overflow the displacement field. Assuming that the frame index |
1669 |
// will overflow the displacement field. Assuming that the frame index |
| 1670 |
// displacement fits into a 31-bit integer (which is only slightly more |
1670 |
// displacement fits into a 31-bit integer (which is only slightly more |
| 1671 |
// aggressive than the current fundamental assumption that it fits into |
1671 |
// aggressive than the current fundamental assumption that it fits into |
| 1672 |
// a 32-bit integer), a 31-bit disp should always be safe. |
1672 |
// a 32-bit integer), a 31-bit disp should always be safe. |
| 1673 |
return isInt<31>(Val); |
1673 |
return isInt<31>(Val); |
| 1674 |
} |
1674 |
} |
| 1675 |
|
1675 |
|
| 1676 |
bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, |
1676 |
bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, |
| 1677 |
X86ISelAddressMode &AM) { |
1677 |
X86ISelAddressMode &AM) { |
| 1678 |
// We may have already matched a displacement and the caller just added the |
1678 |
// We may have already matched a displacement and the caller just added the |
| 1679 |
// symbolic displacement. So we still need to do the checks even if Offset |
1679 |
// symbolic displacement. So we still need to do the checks even if Offset |
| 1680 |
// is zero. |
1680 |
// is zero. |
| 1681 |
|
1681 |
|
| 1682 |
int64_t Val = AM.Disp + Offset; |
1682 |
int64_t Val = AM.Disp + Offset; |
| 1683 |
|
1683 |
|
| 1684 |
// Cannot combine ExternalSymbol displacements with integer offsets. |
1684 |
// Cannot combine ExternalSymbol displacements with integer offsets. |
| 1685 |
if (Val != 0 && (AM.ES || AM.MCSym)) |
1685 |
if (Val != 0 && (AM.ES || AM.MCSym)) |
| 1686 |
return true; |
1686 |
return true; |
| 1687 |
|
1687 |
|
| 1688 |
CodeModel::Model M = TM.getCodeModel(); |
1688 |
CodeModel::Model M = TM.getCodeModel(); |
| 1689 |
if (Subtarget->is64Bit()) { |
1689 |
if (Subtarget->is64Bit()) { |
| 1690 |
if (Val != 0 && |
1690 |
if (Val != 0 && |
| 1691 |
!X86::isOffsetSuitableForCodeModel(Val, M, |
1691 |
!X86::isOffsetSuitableForCodeModel(Val, M, |
| 1692 |
AM.hasSymbolicDisplacement())) |
1692 |
AM.hasSymbolicDisplacement())) |
| 1693 |
return true; |
1693 |
return true; |
| 1694 |
// In addition to the checks required for a register base, check that |
1694 |
// In addition to the checks required for a register base, check that |
| 1695 |
// we do not try to use an unsafe Disp with a frame index. |
1695 |
// we do not try to use an unsafe Disp with a frame index. |
| 1696 |
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && |
1696 |
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && |
| 1697 |
!isDispSafeForFrameIndex(Val)) |
1697 |
!isDispSafeForFrameIndex(Val)) |
| 1698 |
return true; |
1698 |
return true; |
| 1699 |
} |
1699 |
} |
| 1700 |
AM.Disp = Val; |
1700 |
AM.Disp = Val; |
| 1701 |
return false; |
1701 |
return false; |
| 1702 |
|
1702 |
|
| 1703 |
} |
1703 |
} |
| 1704 |
|
1704 |
|
| 1705 |
bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
1705 |
bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
| 1706 |
bool AllowSegmentRegForX32) { |
1706 |
bool AllowSegmentRegForX32) { |
| 1707 |
SDValue Address = N->getOperand(1); |
1707 |
SDValue Address = N->getOperand(1); |
| 1708 |
|
1708 |
|
| 1709 |
// load gs:0 -> GS segment register. |
1709 |
// load gs:0 -> GS segment register. |
| 1710 |
// load fs:0 -> FS segment register. |
1710 |
// load fs:0 -> FS segment register. |
| 1711 |
// |
1711 |
// |
| 1712 |
// This optimization is generally valid because the GNU TLS model defines that |
1712 |
// This optimization is generally valid because the GNU TLS model defines that |
| 1713 |
// gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode |
1713 |
// gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode |
| 1714 |
// with 32-bit registers, as we get in ILP32 mode, those registers are first |
1714 |
// with 32-bit registers, as we get in ILP32 mode, those registers are first |
| 1715 |
// zero-extended to 64 bits and then added it to the base address, which gives |
1715 |
// zero-extended to 64 bits and then added it to the base address, which gives |
| 1716 |
// unwanted results when the register holds a negative value. |
1716 |
// unwanted results when the register holds a negative value. |
| 1717 |
// For more information see http://people.redhat.com/drepper/tls.pdf |
1717 |
// For more information see http://people.redhat.com/drepper/tls.pdf |
| 1718 |
if (isNullConstant(Address) && AM.Segment.getNode() == nullptr && |
1718 |
if (isNullConstant(Address) && AM.Segment.getNode() == nullptr && |
| 1719 |
!IndirectTlsSegRefs && |
1719 |
!IndirectTlsSegRefs && |
| 1720 |
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || |
1720 |
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || |
| 1721 |
Subtarget->isTargetFuchsia())) { |
1721 |
Subtarget->isTargetFuchsia())) { |
| 1722 |
if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) |
1722 |
if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) |
| 1723 |
return true; |
1723 |
return true; |
| 1724 |
switch (N->getPointerInfo().getAddrSpace()) { |
1724 |
switch (N->getPointerInfo().getAddrSpace()) { |
| 1725 |
case X86AS::GS: |
1725 |
case X86AS::GS: |
| 1726 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
1726 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
| 1727 |
return false; |
1727 |
return false; |
| 1728 |
case X86AS::FS: |
1728 |
case X86AS::FS: |
| 1729 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
1729 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
| 1730 |
return false; |
1730 |
return false; |
| 1731 |
// Address space X86AS::SS is not handled here, because it is not used to |
1731 |
// Address space X86AS::SS is not handled here, because it is not used to |
| 1732 |
// address TLS areas. |
1732 |
// address TLS areas. |
| 1733 |
} |
1733 |
} |
| 1734 |
} |
1734 |
} |
| 1735 |
|
1735 |
|
| 1736 |
return true; |
1736 |
return true; |
| 1737 |
} |
1737 |
} |
| 1738 |
|
1738 |
|
| 1739 |
/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing |
1739 |
/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing |
| 1740 |
/// mode. These wrap things that will resolve down into a symbol reference. |
1740 |
/// mode. These wrap things that will resolve down into a symbol reference. |
| 1741 |
/// If no match is possible, this returns true, otherwise it returns false. |
1741 |
/// If no match is possible, this returns true, otherwise it returns false. |
| 1742 |
bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { |
1742 |
bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { |
| 1743 |
// If the addressing mode already has a symbol as the displacement, we can |
1743 |
// If the addressing mode already has a symbol as the displacement, we can |
| 1744 |
// never match another symbol. |
1744 |
// never match another symbol. |
| 1745 |
if (AM.hasSymbolicDisplacement()) |
1745 |
if (AM.hasSymbolicDisplacement()) |
| 1746 |
return true; |
1746 |
return true; |
| 1747 |
|
1747 |
|
| 1748 |
bool IsRIPRelTLS = false; |
1748 |
bool IsRIPRelTLS = false; |
| 1749 |
bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; |
1749 |
bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; |
| 1750 |
if (IsRIPRel) { |
1750 |
if (IsRIPRel) { |
| 1751 |
SDValue Val = N.getOperand(0); |
1751 |
SDValue Val = N.getOperand(0); |
| 1752 |
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
1752 |
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
| 1753 |
IsRIPRelTLS = true; |
1753 |
IsRIPRelTLS = true; |
| 1754 |
} |
1754 |
} |
| 1755 |
|
1755 |
|
| 1756 |
// We can't use an addressing mode in the 64-bit large code model. |
1756 |
// We can't use an addressing mode in the 64-bit large code model. |
| 1757 |
// Global TLS addressing is an exception. In the medium code model, |
1757 |
// Global TLS addressing is an exception. In the medium code model, |
| 1758 |
// we use can use a mode when RIP wrappers are present. |
1758 |
// we use can use a mode when RIP wrappers are present. |
| 1759 |
// That signifies access to globals that are known to be "near", |
1759 |
// That signifies access to globals that are known to be "near", |
| 1760 |
// such as the GOT itself. |
1760 |
// such as the GOT itself. |
| 1761 |
CodeModel::Model M = TM.getCodeModel(); |
1761 |
CodeModel::Model M = TM.getCodeModel(); |
| 1762 |
if (Subtarget->is64Bit() && |
1762 |
if (Subtarget->is64Bit() && |
| 1763 |
((M == CodeModel::Large && !IsRIPRelTLS) || |
1763 |
((M == CodeModel::Large && !IsRIPRelTLS) || |
| 1764 |
(M == CodeModel::Medium && !IsRIPRel))) |
1764 |
(M == CodeModel::Medium && !IsRIPRel))) |
| 1765 |
return true; |
1765 |
return true; |
| 1766 |
|
1766 |
|
| 1767 |
// Base and index reg must be 0 in order to use %rip as base. |
1767 |
// Base and index reg must be 0 in order to use %rip as base. |
| 1768 |
if (IsRIPRel && AM.hasBaseOrIndexReg()) |
1768 |
if (IsRIPRel && AM.hasBaseOrIndexReg()) |
| 1769 |
return true; |
1769 |
return true; |
| 1770 |
|
1770 |
|
| 1771 |
// Make a local copy in case we can't do this fold. |
1771 |
// Make a local copy in case we can't do this fold. |
| 1772 |
X86ISelAddressMode Backup = AM; |
1772 |
X86ISelAddressMode Backup = AM; |
| 1773 |
|
1773 |
|
| 1774 |
int64_t Offset = 0; |
1774 |
int64_t Offset = 0; |
| 1775 |
SDValue N0 = N.getOperand(0); |
1775 |
SDValue N0 = N.getOperand(0); |
| 1776 |
if (auto *G = dyn_cast(N0)) { |
1776 |
if (auto *G = dyn_cast(N0)) { |
| 1777 |
AM.GV = G->getGlobal(); |
1777 |
AM.GV = G->getGlobal(); |
| 1778 |
AM.SymbolFlags = G->getTargetFlags(); |
1778 |
AM.SymbolFlags = G->getTargetFlags(); |
| 1779 |
Offset = G->getOffset(); |
1779 |
Offset = G->getOffset(); |
| 1780 |
} else if (auto *CP = dyn_cast(N0)) { |
1780 |
} else if (auto *CP = dyn_cast(N0)) { |
| 1781 |
AM.CP = CP->getConstVal(); |
1781 |
AM.CP = CP->getConstVal(); |
| 1782 |
AM.Alignment = CP->getAlign(); |
1782 |
AM.Alignment = CP->getAlign(); |
| 1783 |
AM.SymbolFlags = CP->getTargetFlags(); |
1783 |
AM.SymbolFlags = CP->getTargetFlags(); |
| 1784 |
Offset = CP->getOffset(); |
1784 |
Offset = CP->getOffset(); |
| 1785 |
} else if (auto *S = dyn_cast(N0)) { |
1785 |
} else if (auto *S = dyn_cast(N0)) { |
| 1786 |
AM.ES = S->getSymbol(); |
1786 |
AM.ES = S->getSymbol(); |
| 1787 |
AM.SymbolFlags = S->getTargetFlags(); |
1787 |
AM.SymbolFlags = S->getTargetFlags(); |
| 1788 |
} else if (auto *S = dyn_cast(N0)) { |
1788 |
} else if (auto *S = dyn_cast(N0)) { |
| 1789 |
AM.MCSym = S->getMCSymbol(); |
1789 |
AM.MCSym = S->getMCSymbol(); |
| 1790 |
} else if (auto *J = dyn_cast(N0)) { |
1790 |
} else if (auto *J = dyn_cast(N0)) { |
| 1791 |
AM.JT = J->getIndex(); |
1791 |
AM.JT = J->getIndex(); |
| 1792 |
AM.SymbolFlags = J->getTargetFlags(); |
1792 |
AM.SymbolFlags = J->getTargetFlags(); |
| 1793 |
} else if (auto *BA = dyn_cast(N0)) { |
1793 |
} else if (auto *BA = dyn_cast(N0)) { |
| 1794 |
AM.BlockAddr = BA->getBlockAddress(); |
1794 |
AM.BlockAddr = BA->getBlockAddress(); |
| 1795 |
AM.SymbolFlags = BA->getTargetFlags(); |
1795 |
AM.SymbolFlags = BA->getTargetFlags(); |
| 1796 |
Offset = BA->getOffset(); |
1796 |
Offset = BA->getOffset(); |
| 1797 |
} else |
1797 |
} else |
| 1798 |
llvm_unreachable("Unhandled symbol reference node."); |
1798 |
llvm_unreachable("Unhandled symbol reference node."); |
| 1799 |
|
1799 |
|
| 1800 |
if (foldOffsetIntoAddress(Offset, AM)) { |
1800 |
if (foldOffsetIntoAddress(Offset, AM)) { |
| 1801 |
AM = Backup; |
1801 |
AM = Backup; |
| 1802 |
return true; |
1802 |
return true; |
| 1803 |
} |
1803 |
} |
| 1804 |
|
1804 |
|
| 1805 |
if (IsRIPRel) |
1805 |
if (IsRIPRel) |
| 1806 |
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); |
1806 |
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); |
| 1807 |
|
1807 |
|
| 1808 |
// Commit the changes now that we know this fold is safe. |
1808 |
// Commit the changes now that we know this fold is safe. |
| 1809 |
return false; |
1809 |
return false; |
| 1810 |
} |
1810 |
} |
| 1811 |
|
1811 |
|
| 1812 |
/// Add the specified node to the specified addressing mode, returning true if |
1812 |
/// Add the specified node to the specified addressing mode, returning true if |
| 1813 |
/// it cannot be done. This just pattern matches for the addressing mode. |
1813 |
/// it cannot be done. This just pattern matches for the addressing mode. |
| 1814 |
bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { |
1814 |
bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { |
| 1815 |
if (matchAddressRecursively(N, AM, 0)) |
1815 |
if (matchAddressRecursively(N, AM, 0)) |
| 1816 |
return true; |
1816 |
return true; |
| 1817 |
|
1817 |
|
| 1818 |
// Post-processing: Make a second attempt to fold a load, if we now know |
1818 |
// Post-processing: Make a second attempt to fold a load, if we now know |
| 1819 |
// that there will not be any other register. This is only performed for |
1819 |
// that there will not be any other register. This is only performed for |
| 1820 |
// 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded |
1820 |
// 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded |
| 1821 |
// any foldable load the first time. |
1821 |
// any foldable load the first time. |
| 1822 |
if (Subtarget->isTarget64BitILP32() && |
1822 |
if (Subtarget->isTarget64BitILP32() && |
| 1823 |
AM.BaseType == X86ISelAddressMode::RegBase && |
1823 |
AM.BaseType == X86ISelAddressMode::RegBase && |
| 1824 |
AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) { |
1824 |
AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) { |
| 1825 |
SDValue Save_Base_Reg = AM.Base_Reg; |
1825 |
SDValue Save_Base_Reg = AM.Base_Reg; |
| 1826 |
if (auto *LoadN = dyn_cast(Save_Base_Reg)) { |
1826 |
if (auto *LoadN = dyn_cast(Save_Base_Reg)) { |
| 1827 |
AM.Base_Reg = SDValue(); |
1827 |
AM.Base_Reg = SDValue(); |
| 1828 |
if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true)) |
1828 |
if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true)) |
| 1829 |
AM.Base_Reg = Save_Base_Reg; |
1829 |
AM.Base_Reg = Save_Base_Reg; |
| 1830 |
} |
1830 |
} |
| 1831 |
} |
1831 |
} |
| 1832 |
|
1832 |
|
| 1833 |
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has |
1833 |
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has |
| 1834 |
// a smaller encoding and avoids a scaled-index. |
1834 |
// a smaller encoding and avoids a scaled-index. |
| 1835 |
if (AM.Scale == 2 && |
1835 |
if (AM.Scale == 2 && |
| 1836 |
AM.BaseType == X86ISelAddressMode::RegBase && |
1836 |
AM.BaseType == X86ISelAddressMode::RegBase && |
| 1837 |
AM.Base_Reg.getNode() == nullptr) { |
1837 |
AM.Base_Reg.getNode() == nullptr) { |
| 1838 |
AM.Base_Reg = AM.IndexReg; |
1838 |
AM.Base_Reg = AM.IndexReg; |
| 1839 |
AM.Scale = 1; |
1839 |
AM.Scale = 1; |
| 1840 |
} |
1840 |
} |
| 1841 |
|
1841 |
|
| 1842 |
// Post-processing: Convert foo to foo(%rip), even in non-PIC mode, |
1842 |
// Post-processing: Convert foo to foo(%rip), even in non-PIC mode, |
| 1843 |
// because it has a smaller encoding. |
1843 |
// because it has a smaller encoding. |
| 1844 |
// TODO: Which other code models can use this? |
1844 |
// TODO: Which other code models can use this? |
| 1845 |
switch (TM.getCodeModel()) { |
1845 |
switch (TM.getCodeModel()) { |
| 1846 |
default: break; |
1846 |
default: break; |
| 1847 |
case CodeModel::Small: |
1847 |
case CodeModel::Small: |
| 1848 |
case CodeModel::Kernel: |
1848 |
case CodeModel::Kernel: |
| 1849 |
if (Subtarget->is64Bit() && |
1849 |
if (Subtarget->is64Bit() && |
| 1850 |
AM.Scale == 1 && |
1850 |
AM.Scale == 1 && |
| 1851 |
AM.BaseType == X86ISelAddressMode::RegBase && |
1851 |
AM.BaseType == X86ISelAddressMode::RegBase && |
| 1852 |
AM.Base_Reg.getNode() == nullptr && |
1852 |
AM.Base_Reg.getNode() == nullptr && |
| 1853 |
AM.IndexReg.getNode() == nullptr && |
1853 |
AM.IndexReg.getNode() == nullptr && |
| 1854 |
AM.SymbolFlags == X86II::MO_NO_FLAG && |
1854 |
AM.SymbolFlags == X86II::MO_NO_FLAG && |
| 1855 |
AM.hasSymbolicDisplacement()) |
1855 |
AM.hasSymbolicDisplacement()) |
| 1856 |
AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); |
1856 |
AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); |
| 1857 |
break; |
1857 |
break; |
| 1858 |
} |
1858 |
} |
| 1859 |
|
1859 |
|
| 1860 |
return false; |
1860 |
return false; |
| 1861 |
} |
1861 |
} |
| 1862 |
|
1862 |
|
| 1863 |
bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, |
1863 |
bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, |
| 1864 |
unsigned Depth) { |
1864 |
unsigned Depth) { |
| 1865 |
// Add an artificial use to this node so that we can keep track of |
1865 |
// Add an artificial use to this node so that we can keep track of |
| 1866 |
// it if it gets CSE'd with a different node. |
1866 |
// it if it gets CSE'd with a different node. |
| 1867 |
HandleSDNode Handle(N); |
1867 |
HandleSDNode Handle(N); |
| 1868 |
|
1868 |
|
| 1869 |
X86ISelAddressMode Backup = AM; |
1869 |
X86ISelAddressMode Backup = AM; |
| 1870 |
if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && |
1870 |
if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && |
| 1871 |
!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) |
1871 |
!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) |
| 1872 |
return false; |
1872 |
return false; |
| 1873 |
AM = Backup; |
1873 |
AM = Backup; |
| 1874 |
|
1874 |
|
| 1875 |
// Try again after commutating the operands. |
1875 |
// Try again after commutating the operands. |
| 1876 |
if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, |
1876 |
if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, |
| 1877 |
Depth + 1) && |
1877 |
Depth + 1) && |
| 1878 |
!matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1)) |
1878 |
!matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1)) |
| 1879 |
return false; |
1879 |
return false; |
| 1880 |
AM = Backup; |
1880 |
AM = Backup; |
| 1881 |
|
1881 |
|
| 1882 |
// If we couldn't fold both operands into the address at the same time, |
1882 |
// If we couldn't fold both operands into the address at the same time, |
| 1883 |
// see if we can just put each operand into a register and fold at least |
1883 |
// see if we can just put each operand into a register and fold at least |
| 1884 |
// the add. |
1884 |
// the add. |
| 1885 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
1885 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
| 1886 |
!AM.Base_Reg.getNode() && |
1886 |
!AM.Base_Reg.getNode() && |
| 1887 |
!AM.IndexReg.getNode()) { |
1887 |
!AM.IndexReg.getNode()) { |
| 1888 |
N = Handle.getValue(); |
1888 |
N = Handle.getValue(); |
| 1889 |
AM.Base_Reg = N.getOperand(0); |
1889 |
AM.Base_Reg = N.getOperand(0); |
| 1890 |
AM.IndexReg = N.getOperand(1); |
1890 |
AM.IndexReg = N.getOperand(1); |
| 1891 |
AM.Scale = 1; |
1891 |
AM.Scale = 1; |
| 1892 |
return false; |
1892 |
return false; |
| 1893 |
} |
1893 |
} |
| 1894 |
N = Handle.getValue(); |
1894 |
N = Handle.getValue(); |
| 1895 |
return true; |
1895 |
return true; |
| 1896 |
} |
1896 |
} |
| 1897 |
|
1897 |
|
| 1898 |
// Insert a node into the DAG at least before the Pos node's position. This |
1898 |
// Insert a node into the DAG at least before the Pos node's position. This |
| 1899 |
// will reposition the node as needed, and will assign it a node ID that is <= |
1899 |
// will reposition the node as needed, and will assign it a node ID that is <= |
| 1900 |
// the Pos node's ID. Note that this does *not* preserve the uniqueness of node |
1900 |
// the Pos node's ID. Note that this does *not* preserve the uniqueness of node |
| 1901 |
// IDs! The selection DAG must no longer depend on their uniqueness when this |
1901 |
// IDs! The selection DAG must no longer depend on their uniqueness when this |
| 1902 |
// is used. |
1902 |
// is used. |
| 1903 |
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { |
1903 |
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { |
| 1904 |
if (N->getNodeId() == -1 || |
1904 |
if (N->getNodeId() == -1 || |
| 1905 |
(SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > |
1905 |
(SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > |
| 1906 |
SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { |
1906 |
SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { |
| 1907 |
DAG.RepositionNode(Pos->getIterator(), N.getNode()); |
1907 |
DAG.RepositionNode(Pos->getIterator(), N.getNode()); |
| 1908 |
// Mark Node as invalid for pruning as after this it may be a successor to a |
1908 |
// Mark Node as invalid for pruning as after this it may be a successor to a |
| 1909 |
// selected node but otherwise be in the same position of Pos. |
1909 |
// selected node but otherwise be in the same position of Pos. |
| 1910 |
// Conservatively mark it with the same -abs(Id) to assure node id |
1910 |
// Conservatively mark it with the same -abs(Id) to assure node id |
| 1911 |
// invariant is preserved. |
1911 |
// invariant is preserved. |
| 1912 |
N->setNodeId(Pos->getNodeId()); |
1912 |
N->setNodeId(Pos->getNodeId()); |
| 1913 |
SelectionDAGISel::InvalidateNodeId(N.getNode()); |
1913 |
SelectionDAGISel::InvalidateNodeId(N.getNode()); |
| 1914 |
} |
1914 |
} |
| 1915 |
} |
1915 |
} |
| 1916 |
|
1916 |
|
| 1917 |
// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if |
1917 |
// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if |
| 1918 |
// safe. This allows us to convert the shift and and into an h-register |
1918 |
// safe. This allows us to convert the shift and and into an h-register |
| 1919 |
// extract and a scaled index. Returns false if the simplification is |
1919 |
// extract and a scaled index. Returns false if the simplification is |
| 1920 |
// performed. |
1920 |
// performed. |
| 1921 |
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, |
1921 |
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, |
| 1922 |
uint64_t Mask, |
1922 |
uint64_t Mask, |
| 1923 |
SDValue Shift, SDValue X, |
1923 |
SDValue Shift, SDValue X, |
| 1924 |
X86ISelAddressMode &AM) { |
1924 |
X86ISelAddressMode &AM) { |
| 1925 |
if (Shift.getOpcode() != ISD::SRL || |
1925 |
if (Shift.getOpcode() != ISD::SRL || |
| 1926 |
!isa(Shift.getOperand(1)) || |
1926 |
!isa(Shift.getOperand(1)) || |
| 1927 |
!Shift.hasOneUse()) |
1927 |
!Shift.hasOneUse()) |
| 1928 |
return true; |
1928 |
return true; |
| 1929 |
|
1929 |
|
| 1930 |
int ScaleLog = 8 - Shift.getConstantOperandVal(1); |
1930 |
int ScaleLog = 8 - Shift.getConstantOperandVal(1); |
| 1931 |
if (ScaleLog <= 0 || ScaleLog >= 4 || |
1931 |
if (ScaleLog <= 0 || ScaleLog >= 4 || |
| 1932 |
Mask != (0xffu << ScaleLog)) |
1932 |
Mask != (0xffu << ScaleLog)) |
| 1933 |
return true; |
1933 |
return true; |
| 1934 |
|
1934 |
|
| 1935 |
MVT XVT = X.getSimpleValueType(); |
1935 |
MVT XVT = X.getSimpleValueType(); |
| 1936 |
MVT VT = N.getSimpleValueType(); |
1936 |
MVT VT = N.getSimpleValueType(); |
| 1937 |
SDLoc DL(N); |
1937 |
SDLoc DL(N); |
| 1938 |
SDValue Eight = DAG.getConstant(8, DL, MVT::i8); |
1938 |
SDValue Eight = DAG.getConstant(8, DL, MVT::i8); |
| 1939 |
SDValue NewMask = DAG.getConstant(0xff, DL, XVT); |
1939 |
SDValue NewMask = DAG.getConstant(0xff, DL, XVT); |
| 1940 |
SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight); |
1940 |
SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight); |
| 1941 |
SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask); |
1941 |
SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask); |
| 1942 |
SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); |
1942 |
SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); |
| 1943 |
SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT); |
1943 |
SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT); |
| 1944 |
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount); |
1944 |
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount); |
| 1945 |
|
1945 |
|
| 1946 |
// Insert the new nodes into the topological ordering. We must do this in |
1946 |
// Insert the new nodes into the topological ordering. We must do this in |
| 1947 |
// a valid topological ordering as nothing is going to go back and re-sort |
1947 |
// a valid topological ordering as nothing is going to go back and re-sort |
| 1948 |
// these nodes. We continually insert before 'N' in sequence as this is |
1948 |
// these nodes. We continually insert before 'N' in sequence as this is |
| 1949 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
1949 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
| 1950 |
// hierarchy left to express. |
1950 |
// hierarchy left to express. |
| 1951 |
insertDAGNode(DAG, N, Eight); |
1951 |
insertDAGNode(DAG, N, Eight); |
| 1952 |
insertDAGNode(DAG, N, Srl); |
1952 |
insertDAGNode(DAG, N, Srl); |
| 1953 |
insertDAGNode(DAG, N, NewMask); |
1953 |
insertDAGNode(DAG, N, NewMask); |
| 1954 |
insertDAGNode(DAG, N, And); |
1954 |
insertDAGNode(DAG, N, And); |
| 1955 |
insertDAGNode(DAG, N, ShlCount); |
1955 |
insertDAGNode(DAG, N, ShlCount); |
| 1956 |
if (Ext != And) |
1956 |
if (Ext != And) |
| 1957 |
insertDAGNode(DAG, N, Ext); |
1957 |
insertDAGNode(DAG, N, Ext); |
| 1958 |
insertDAGNode(DAG, N, Shl); |
1958 |
insertDAGNode(DAG, N, Shl); |
| 1959 |
DAG.ReplaceAllUsesWith(N, Shl); |
1959 |
DAG.ReplaceAllUsesWith(N, Shl); |
| 1960 |
DAG.RemoveDeadNode(N.getNode()); |
1960 |
DAG.RemoveDeadNode(N.getNode()); |
| 1961 |
AM.IndexReg = Ext; |
1961 |
AM.IndexReg = Ext; |
| 1962 |
AM.Scale = (1 << ScaleLog); |
1962 |
AM.Scale = (1 << ScaleLog); |
| 1963 |
return false; |
1963 |
return false; |
| 1964 |
} |
1964 |
} |
| 1965 |
|
1965 |
|
| 1966 |
// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this |
1966 |
// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this |
| 1967 |
// allows us to fold the shift into this addressing mode. Returns false if the |
1967 |
// allows us to fold the shift into this addressing mode. Returns false if the |
| 1968 |
// transform succeeded. |
1968 |
// transform succeeded. |
| 1969 |
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, |
1969 |
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, |
| 1970 |
X86ISelAddressMode &AM) { |
1970 |
X86ISelAddressMode &AM) { |
| 1971 |
SDValue Shift = N.getOperand(0); |
1971 |
SDValue Shift = N.getOperand(0); |
| 1972 |
|
1972 |
|
| 1973 |
// Use a signed mask so that shifting right will insert sign bits. These |
1973 |
// Use a signed mask so that shifting right will insert sign bits. These |
| 1974 |
// bits will be removed when we shift the result left so it doesn't matter |
1974 |
// bits will be removed when we shift the result left so it doesn't matter |
| 1975 |
// what we use. This might allow a smaller immediate encoding. |
1975 |
// what we use. This might allow a smaller immediate encoding. |
| 1976 |
int64_t Mask = cast(N->getOperand(1))->getSExtValue(); |
1976 |
int64_t Mask = cast(N->getOperand(1))->getSExtValue(); |
| 1977 |
|
1977 |
|
| 1978 |
// If we have an any_extend feeding the AND, look through it to see if there |
1978 |
// If we have an any_extend feeding the AND, look through it to see if there |
| 1979 |
// is a shift behind it. But only if the AND doesn't use the extended bits. |
1979 |
// is a shift behind it. But only if the AND doesn't use the extended bits. |
| 1980 |
// FIXME: Generalize this to other ANY_EXTEND than i32 to i64? |
1980 |
// FIXME: Generalize this to other ANY_EXTEND than i32 to i64? |
| 1981 |
bool FoundAnyExtend = false; |
1981 |
bool FoundAnyExtend = false; |
| 1982 |
if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
1982 |
if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
| 1983 |
Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
1983 |
Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
| 1984 |
isUInt<32>(Mask)) { |
1984 |
isUInt<32>(Mask)) { |
| 1985 |
FoundAnyExtend = true; |
1985 |
FoundAnyExtend = true; |
| 1986 |
Shift = Shift.getOperand(0); |
1986 |
Shift = Shift.getOperand(0); |
| 1987 |
} |
1987 |
} |
| 1988 |
|
1988 |
|
| 1989 |
if (Shift.getOpcode() != ISD::SHL || |
1989 |
if (Shift.getOpcode() != ISD::SHL || |
| 1990 |
!isa(Shift.getOperand(1))) |
1990 |
!isa(Shift.getOperand(1))) |
| 1991 |
return true; |
1991 |
return true; |
| 1992 |
|
1992 |
|
| 1993 |
SDValue X = Shift.getOperand(0); |
1993 |
SDValue X = Shift.getOperand(0); |
| 1994 |
|
1994 |
|
| 1995 |
// Not likely to be profitable if either the AND or SHIFT node has more |
1995 |
// Not likely to be profitable if either the AND or SHIFT node has more |
| 1996 |
// than one use (unless all uses are for address computation). Besides, |
1996 |
// than one use (unless all uses are for address computation). Besides, |
| 1997 |
// isel mechanism requires their node ids to be reused. |
1997 |
// isel mechanism requires their node ids to be reused. |
| 1998 |
if (!N.hasOneUse() || !Shift.hasOneUse()) |
1998 |
if (!N.hasOneUse() || !Shift.hasOneUse()) |
| 1999 |
return true; |
1999 |
return true; |
| 2000 |
|
2000 |
|
| 2001 |
// Verify that the shift amount is something we can fold. |
2001 |
// Verify that the shift amount is something we can fold. |
| 2002 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
2002 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
| 2003 |
if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) |
2003 |
if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) |
| 2004 |
return true; |
2004 |
return true; |
| 2005 |
|
2005 |
|
| 2006 |
MVT VT = N.getSimpleValueType(); |
2006 |
MVT VT = N.getSimpleValueType(); |
| 2007 |
SDLoc DL(N); |
2007 |
SDLoc DL(N); |
| 2008 |
if (FoundAnyExtend) { |
2008 |
if (FoundAnyExtend) { |
| 2009 |
SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); |
2009 |
SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); |
| 2010 |
insertDAGNode(DAG, N, NewX); |
2010 |
insertDAGNode(DAG, N, NewX); |
| 2011 |
X = NewX; |
2011 |
X = NewX; |
| 2012 |
} |
2012 |
} |
| 2013 |
|
2013 |
|
| 2014 |
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); |
2014 |
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); |
| 2015 |
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); |
2015 |
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); |
| 2016 |
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); |
2016 |
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); |
| 2017 |
|
2017 |
|
| 2018 |
// Insert the new nodes into the topological ordering. We must do this in |
2018 |
// Insert the new nodes into the topological ordering. We must do this in |
| 2019 |
// a valid topological ordering as nothing is going to go back and re-sort |
2019 |
// a valid topological ordering as nothing is going to go back and re-sort |
| 2020 |
// these nodes. We continually insert before 'N' in sequence as this is |
2020 |
// these nodes. We continually insert before 'N' in sequence as this is |
| 2021 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
2021 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
| 2022 |
// hierarchy left to express. |
2022 |
// hierarchy left to express. |
| 2023 |
insertDAGNode(DAG, N, NewMask); |
2023 |
insertDAGNode(DAG, N, NewMask); |
| 2024 |
insertDAGNode(DAG, N, NewAnd); |
2024 |
insertDAGNode(DAG, N, NewAnd); |
| 2025 |
insertDAGNode(DAG, N, NewShift); |
2025 |
insertDAGNode(DAG, N, NewShift); |
| 2026 |
DAG.ReplaceAllUsesWith(N, NewShift); |
2026 |
DAG.ReplaceAllUsesWith(N, NewShift); |
| 2027 |
DAG.RemoveDeadNode(N.getNode()); |
2027 |
DAG.RemoveDeadNode(N.getNode()); |
| 2028 |
|
2028 |
|
| 2029 |
AM.Scale = 1 << ShiftAmt; |
2029 |
AM.Scale = 1 << ShiftAmt; |
| 2030 |
AM.IndexReg = NewAnd; |
2030 |
AM.IndexReg = NewAnd; |
| 2031 |
return false; |
2031 |
return false; |
| 2032 |
} |
2032 |
} |
| 2033 |
|
2033 |
|
| 2034 |
// Implement some heroics to detect shifts of masked values where the mask can |
2034 |
// Implement some heroics to detect shifts of masked values where the mask can |
| 2035 |
// be replaced by extending the shift and undoing that in the addressing mode |
2035 |
// be replaced by extending the shift and undoing that in the addressing mode |
| 2036 |
// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and |
2036 |
// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and |
| 2037 |
// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in |
2037 |
// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in |
| 2038 |
// the addressing mode. This results in code such as: |
2038 |
// the addressing mode. This results in code such as: |
| 2039 |
// |
2039 |
// |
| 2040 |
// int f(short *y, int *lookup_table) { |
2040 |
// int f(short *y, int *lookup_table) { |
| 2041 |
// ... |
2041 |
// ... |
| 2042 |
// return *y + lookup_table[*y >> 11]; |
2042 |
// return *y + lookup_table[*y >> 11]; |
| 2043 |
// } |
2043 |
// } |
| 2044 |
// |
2044 |
// |
| 2045 |
// Turning into: |
2045 |
// Turning into: |
| 2046 |
// movzwl (%rdi), %eax |
2046 |
// movzwl (%rdi), %eax |
| 2047 |
// movl %eax, %ecx |
2047 |
// movl %eax, %ecx |
| 2048 |
// shrl $11, %ecx |
2048 |
// shrl $11, %ecx |
| 2049 |
// addl (%rsi,%rcx,4), %eax |
2049 |
// addl (%rsi,%rcx,4), %eax |
| 2050 |
// |
2050 |
// |
| 2051 |
// Instead of: |
2051 |
// Instead of: |
| 2052 |
// movzwl (%rdi), %eax |
2052 |
// movzwl (%rdi), %eax |
| 2053 |
// movl %eax, %ecx |
2053 |
// movl %eax, %ecx |
| 2054 |
// shrl $9, %ecx |
2054 |
// shrl $9, %ecx |
| 2055 |
// andl $124, %rcx |
2055 |
// andl $124, %rcx |
| 2056 |
// addl (%rsi,%rcx), %eax |
2056 |
// addl (%rsi,%rcx), %eax |
| 2057 |
// |
2057 |
// |
| 2058 |
// Note that this function assumes the mask is provided as a mask *after* the |
2058 |
// Note that this function assumes the mask is provided as a mask *after* the |
| 2059 |
// value is shifted. The input chain may or may not match that, but computing |
2059 |
// value is shifted. The input chain may or may not match that, but computing |
| 2060 |
// such a mask is trivial. |
2060 |
// such a mask is trivial. |
| 2061 |
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, |
2061 |
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, |
| 2062 |
uint64_t Mask, |
2062 |
uint64_t Mask, |
| 2063 |
SDValue Shift, SDValue X, |
2063 |
SDValue Shift, SDValue X, |
| 2064 |
X86ISelAddressMode &AM) { |
2064 |
X86ISelAddressMode &AM) { |
| 2065 |
if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || |
2065 |
if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || |
| 2066 |
!isa(Shift.getOperand(1))) |
2066 |
!isa(Shift.getOperand(1))) |
| 2067 |
return true; |
2067 |
return true; |
| 2068 |
|
2068 |
|
| 2069 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
2069 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
| 2070 |
unsigned MaskLZ = llvm::countl_zero(Mask); |
2070 |
unsigned MaskLZ = llvm::countl_zero(Mask); |
| 2071 |
unsigned MaskTZ = llvm::countr_zero(Mask); |
2071 |
unsigned MaskTZ = llvm::countr_zero(Mask); |
| 2072 |
|
2072 |
|
| 2073 |
// The amount of shift we're trying to fit into the addressing mode is taken |
2073 |
// The amount of shift we're trying to fit into the addressing mode is taken |
| 2074 |
// from the trailing zeros of the mask. |
2074 |
// from the trailing zeros of the mask. |
| 2075 |
unsigned AMShiftAmt = MaskTZ; |
2075 |
unsigned AMShiftAmt = MaskTZ; |
| 2076 |
|
2076 |
|
| 2077 |
// There is nothing we can do here unless the mask is removing some bits. |
2077 |
// There is nothing we can do here unless the mask is removing some bits. |
| 2078 |
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. |
2078 |
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. |
| 2079 |
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
2079 |
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
| 2080 |
|
2080 |
|
| 2081 |
// We also need to ensure that mask is a continuous run of bits. |
2081 |
// We also need to ensure that mask is a continuous run of bits. |
| 2082 |
if (llvm::countr_one(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) |
2082 |
if (llvm::countr_one(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) |
| 2083 |
return true; |
2083 |
return true; |
| 2084 |
|
2084 |
|
| 2085 |
// Scale the leading zero count down based on the actual size of the value. |
2085 |
// Scale the leading zero count down based on the actual size of the value. |
| 2086 |
// Also scale it down based on the size of the shift. |
2086 |
// Also scale it down based on the size of the shift. |
| 2087 |
unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; |
2087 |
unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; |
| 2088 |
if (MaskLZ < ScaleDown) |
2088 |
if (MaskLZ < ScaleDown) |
| 2089 |
return true; |
2089 |
return true; |
| 2090 |
MaskLZ -= ScaleDown; |
2090 |
MaskLZ -= ScaleDown; |
| 2091 |
|
2091 |
|
| 2092 |
// The final check is to ensure that any masked out high bits of X are |
2092 |
// The final check is to ensure that any masked out high bits of X are |
| 2093 |
// already known to be zero. Otherwise, the mask has a semantic impact |
2093 |
// already known to be zero. Otherwise, the mask has a semantic impact |
| 2094 |
// other than masking out a couple of low bits. Unfortunately, because of |
2094 |
// other than masking out a couple of low bits. Unfortunately, because of |
| 2095 |
// the mask, zero extensions will be removed from operands in some cases. |
2095 |
// the mask, zero extensions will be removed from operands in some cases. |
| 2096 |
// This code works extra hard to look through extensions because we can |
2096 |
// This code works extra hard to look through extensions because we can |
| 2097 |
// replace them with zero extensions cheaply if necessary. |
2097 |
// replace them with zero extensions cheaply if necessary. |
| 2098 |
bool ReplacingAnyExtend = false; |
2098 |
bool ReplacingAnyExtend = false; |
| 2099 |
if (X.getOpcode() == ISD::ANY_EXTEND) { |
2099 |
if (X.getOpcode() == ISD::ANY_EXTEND) { |
| 2100 |
unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - |
2100 |
unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - |
| 2101 |
X.getOperand(0).getSimpleValueType().getSizeInBits(); |
2101 |
X.getOperand(0).getSimpleValueType().getSizeInBits(); |
| 2102 |
// Assume that we'll replace the any-extend with a zero-extend, and |
2102 |
// Assume that we'll replace the any-extend with a zero-extend, and |
| 2103 |
// narrow the search to the extended value. |
2103 |
// narrow the search to the extended value. |
| 2104 |
X = X.getOperand(0); |
2104 |
X = X.getOperand(0); |
| 2105 |
MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; |
2105 |
MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; |
| 2106 |
ReplacingAnyExtend = true; |
2106 |
ReplacingAnyExtend = true; |
| 2107 |
} |
2107 |
} |
| 2108 |
APInt MaskedHighBits = |
2108 |
APInt MaskedHighBits = |
| 2109 |
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); |
2109 |
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); |
| 2110 |
KnownBits Known = DAG.computeKnownBits(X); |
2110 |
KnownBits Known = DAG.computeKnownBits(X); |
| 2111 |
if (MaskedHighBits != Known.Zero) return true; |
2111 |
if (MaskedHighBits != Known.Zero) return true; |
| 2112 |
|
2112 |
|
| 2113 |
// We've identified a pattern that can be transformed into a single shift |
2113 |
// We've identified a pattern that can be transformed into a single shift |
| 2114 |
// and an addressing mode. Make it so. |
2114 |
// and an addressing mode. Make it so. |
| 2115 |
MVT VT = N.getSimpleValueType(); |
2115 |
MVT VT = N.getSimpleValueType(); |
| 2116 |
if (ReplacingAnyExtend) { |
2116 |
if (ReplacingAnyExtend) { |
| 2117 |
assert(X.getValueType() != VT); |
2117 |
assert(X.getValueType() != VT); |
| 2118 |
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. |
2118 |
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. |
| 2119 |
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); |
2119 |
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); |
| 2120 |
insertDAGNode(DAG, N, NewX); |
2120 |
insertDAGNode(DAG, N, NewX); |
| 2121 |
X = NewX; |
2121 |
X = NewX; |
| 2122 |
} |
2122 |
} |
| 2123 |
SDLoc DL(N); |
2123 |
SDLoc DL(N); |
| 2124 |
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
2124 |
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
| 2125 |
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
2125 |
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
| 2126 |
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
2126 |
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
| 2127 |
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); |
2127 |
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); |
| 2128 |
|
2128 |
|
| 2129 |
// Insert the new nodes into the topological ordering. We must do this in |
2129 |
// Insert the new nodes into the topological ordering. We must do this in |
| 2130 |
// a valid topological ordering as nothing is going to go back and re-sort |
2130 |
// a valid topological ordering as nothing is going to go back and re-sort |
| 2131 |
// these nodes. We continually insert before 'N' in sequence as this is |
2131 |
// these nodes. We continually insert before 'N' in sequence as this is |
| 2132 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
2132 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
| 2133 |
// hierarchy left to express. |
2133 |
// hierarchy left to express. |
| 2134 |
insertDAGNode(DAG, N, NewSRLAmt); |
2134 |
insertDAGNode(DAG, N, NewSRLAmt); |
| 2135 |
insertDAGNode(DAG, N, NewSRL); |
2135 |
insertDAGNode(DAG, N, NewSRL); |
| 2136 |
insertDAGNode(DAG, N, NewSHLAmt); |
2136 |
insertDAGNode(DAG, N, NewSHLAmt); |
| 2137 |
insertDAGNode(DAG, N, NewSHL); |
2137 |
insertDAGNode(DAG, N, NewSHL); |
| 2138 |
DAG.ReplaceAllUsesWith(N, NewSHL); |
2138 |
DAG.ReplaceAllUsesWith(N, NewSHL); |
| 2139 |
DAG.RemoveDeadNode(N.getNode()); |
2139 |
DAG.RemoveDeadNode(N.getNode()); |
| 2140 |
|
2140 |
|
| 2141 |
AM.Scale = 1 << AMShiftAmt; |
2141 |
AM.Scale = 1 << AMShiftAmt; |
| 2142 |
AM.IndexReg = NewSRL; |
2142 |
AM.IndexReg = NewSRL; |
| 2143 |
return false; |
2143 |
return false; |
| 2144 |
} |
2144 |
} |
| 2145 |
|
2145 |
|
| 2146 |
// Transform "(X >> SHIFT) & (MASK << C1)" to |
2146 |
// Transform "(X >> SHIFT) & (MASK << C1)" to |
| 2147 |
// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be |
2147 |
// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be |
| 2148 |
// matched to a BEXTR later. Returns false if the simplification is performed. |
2148 |
// matched to a BEXTR later. Returns false if the simplification is performed. |
| 2149 |
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, |
2149 |
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, |
| 2150 |
uint64_t Mask, |
2150 |
uint64_t Mask, |
| 2151 |
SDValue Shift, SDValue X, |
2151 |
SDValue Shift, SDValue X, |
| 2152 |
X86ISelAddressMode &AM, |
2152 |
X86ISelAddressMode &AM, |
| 2153 |
const X86Subtarget &Subtarget) { |
2153 |
const X86Subtarget &Subtarget) { |
| 2154 |
if (Shift.getOpcode() != ISD::SRL || |
2154 |
if (Shift.getOpcode() != ISD::SRL || |
| 2155 |
!isa(Shift.getOperand(1)) || |
2155 |
!isa(Shift.getOperand(1)) || |
| 2156 |
!Shift.hasOneUse() || !N.hasOneUse()) |
2156 |
!Shift.hasOneUse() || !N.hasOneUse()) |
| 2157 |
return true; |
2157 |
return true; |
| 2158 |
|
2158 |
|
| 2159 |
// Only do this if BEXTR will be matched by matchBEXTRFromAndImm. |
2159 |
// Only do this if BEXTR will be matched by matchBEXTRFromAndImm. |
| 2160 |
if (!Subtarget.hasTBM() && |
2160 |
if (!Subtarget.hasTBM() && |
| 2161 |
!(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) |
2161 |
!(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) |
| 2162 |
return true; |
2162 |
return true; |
| 2163 |
|
2163 |
|
| 2164 |
// We need to ensure that mask is a continuous run of bits. |
2164 |
// We need to ensure that mask is a continuous run of bits. |
| 2165 |
if (!isShiftedMask_64(Mask)) return true; |
2165 |
if (!isShiftedMask_64(Mask)) return true; |
| 2166 |
|
2166 |
|
| 2167 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
2167 |
unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
| 2168 |
|
2168 |
|
| 2169 |
// The amount of shift we're trying to fit into the addressing mode is taken |
2169 |
// The amount of shift we're trying to fit into the addressing mode is taken |
| 2170 |
// from the trailing zeros of the mask. |
2170 |
// from the trailing zeros of the mask. |
| 2171 |
unsigned AMShiftAmt = llvm::countr_zero(Mask); |
2171 |
unsigned AMShiftAmt = llvm::countr_zero(Mask); |
| 2172 |
|
2172 |
|
| 2173 |
// There is nothing we can do here unless the mask is removing some bits. |
2173 |
// There is nothing we can do here unless the mask is removing some bits. |
| 2174 |
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. |
2174 |
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. |
| 2175 |
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
2175 |
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
| 2176 |
|
2176 |
|
| 2177 |
MVT VT = N.getSimpleValueType(); |
2177 |
MVT VT = N.getSimpleValueType(); |
| 2178 |
SDLoc DL(N); |
2178 |
SDLoc DL(N); |
| 2179 |
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
2179 |
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
| 2180 |
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
2180 |
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
| 2181 |
SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT); |
2181 |
SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT); |
| 2182 |
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask); |
2182 |
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask); |
| 2183 |
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
2183 |
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
| 2184 |
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt); |
2184 |
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt); |
| 2185 |
|
2185 |
|
| 2186 |
// Insert the new nodes into the topological ordering. We must do this in |
2186 |
// Insert the new nodes into the topological ordering. We must do this in |
| 2187 |
// a valid topological ordering as nothing is going to go back and re-sort |
2187 |
// a valid topological ordering as nothing is going to go back and re-sort |
| 2188 |
// these nodes. We continually insert before 'N' in sequence as this is |
2188 |
// these nodes. We continually insert before 'N' in sequence as this is |
| 2189 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
2189 |
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no |
| 2190 |
// hierarchy left to express. |
2190 |
// hierarchy left to express. |
| 2191 |
insertDAGNode(DAG, N, NewSRLAmt); |
2191 |
insertDAGNode(DAG, N, NewSRLAmt); |
| 2192 |
insertDAGNode(DAG, N, NewSRL); |
2192 |
insertDAGNode(DAG, N, NewSRL); |
| 2193 |
insertDAGNode(DAG, N, NewMask); |
2193 |
insertDAGNode(DAG, N, NewMask); |
| 2194 |
insertDAGNode(DAG, N, NewAnd); |
2194 |
insertDAGNode(DAG, N, NewAnd); |
| 2195 |
insertDAGNode(DAG, N, NewSHLAmt); |
2195 |
insertDAGNode(DAG, N, NewSHLAmt); |
| 2196 |
insertDAGNode(DAG, N, NewSHL); |
2196 |
insertDAGNode(DAG, N, NewSHL); |
| 2197 |
DAG.ReplaceAllUsesWith(N, NewSHL); |
2197 |
DAG.ReplaceAllUsesWith(N, NewSHL); |
| 2198 |
DAG.RemoveDeadNode(N.getNode()); |
2198 |
DAG.RemoveDeadNode(N.getNode()); |
| 2199 |
|
2199 |
|
| 2200 |
AM.Scale = 1 << AMShiftAmt; |
2200 |
AM.Scale = 1 << AMShiftAmt; |
| 2201 |
AM.IndexReg = NewAnd; |
2201 |
AM.IndexReg = NewAnd; |
| 2202 |
return false; |
2202 |
return false; |
| 2203 |
} |
2203 |
} |
| 2204 |
|
2204 |
|
| 2205 |
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
2205 |
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
| 2206 |
unsigned Depth) { |
2206 |
unsigned Depth) { |
| 2207 |
SDLoc dl(N); |
2207 |
SDLoc dl(N); |
| 2208 |
LLVM_DEBUG({ |
2208 |
LLVM_DEBUG({ |
| 2209 |
dbgs() << "MatchAddress: "; |
2209 |
dbgs() << "MatchAddress: "; |
| 2210 |
AM.dump(CurDAG); |
2210 |
AM.dump(CurDAG); |
| 2211 |
}); |
2211 |
}); |
| 2212 |
// Limit recursion. |
2212 |
// Limit recursion. |
| 2213 |
if (Depth > 5) |
2213 |
if (Depth > 5) |
| 2214 |
return matchAddressBase(N, AM); |
2214 |
return matchAddressBase(N, AM); |
| 2215 |
|
2215 |
|
| 2216 |
// If this is already a %rip relative address, we can only merge immediates |
2216 |
// If this is already a %rip relative address, we can only merge immediates |
| 2217 |
// into it. Instead of handling this in every case, we handle it here. |
2217 |
// into it. Instead of handling this in every case, we handle it here. |
| 2218 |
// RIP relative addressing: %rip + 32-bit displacement! |
2218 |
// RIP relative addressing: %rip + 32-bit displacement! |
| 2219 |
if (AM.isRIPRelative()) { |
2219 |
if (AM.isRIPRelative()) { |
| 2220 |
// FIXME: JumpTable and ExternalSymbol address currently don't like |
2220 |
// FIXME: JumpTable and ExternalSymbol address currently don't like |
| 2221 |
// displacements. It isn't very important, but this should be fixed for |
2221 |
// displacements. It isn't very important, but this should be fixed for |
| 2222 |
// consistency. |
2222 |
// consistency. |
| 2223 |
if (!(AM.ES || AM.MCSym) && AM.JT != -1) |
2223 |
if (!(AM.ES || AM.MCSym) && AM.JT != -1) |
| 2224 |
return true; |
2224 |
return true; |
| 2225 |
|
2225 |
|
| 2226 |
if (auto *Cst = dyn_cast(N)) |
2226 |
if (auto *Cst = dyn_cast(N)) |
| 2227 |
if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) |
2227 |
if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) |
| 2228 |
return false; |
2228 |
return false; |
| 2229 |
return true; |
2229 |
return true; |
| 2230 |
} |
2230 |
} |
| 2231 |
|
2231 |
|
| 2232 |
switch (N.getOpcode()) { |
2232 |
switch (N.getOpcode()) { |
| 2233 |
default: break; |
2233 |
default: break; |
| 2234 |
case ISD::LOCAL_RECOVER: { |
2234 |
case ISD::LOCAL_RECOVER: { |
| 2235 |
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) |
2235 |
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) |
| 2236 |
if (const auto *ESNode = dyn_cast(N.getOperand(0))) { |
2236 |
if (const auto *ESNode = dyn_cast(N.getOperand(0))) { |
| 2237 |
// Use the symbol and don't prefix it. |
2237 |
// Use the symbol and don't prefix it. |
| 2238 |
AM.MCSym = ESNode->getMCSymbol(); |
2238 |
AM.MCSym = ESNode->getMCSymbol(); |
| 2239 |
return false; |
2239 |
return false; |
| 2240 |
} |
2240 |
} |
| 2241 |
break; |
2241 |
break; |
| 2242 |
} |
2242 |
} |
| 2243 |
case ISD::Constant: { |
2243 |
case ISD::Constant: { |
| 2244 |
uint64_t Val = cast(N)->getSExtValue(); |
2244 |
uint64_t Val = cast(N)->getSExtValue(); |
| 2245 |
if (!foldOffsetIntoAddress(Val, AM)) |
2245 |
if (!foldOffsetIntoAddress(Val, AM)) |
| 2246 |
return false; |
2246 |
return false; |
| 2247 |
break; |
2247 |
break; |
| 2248 |
} |
2248 |
} |
| 2249 |
|
2249 |
|
| 2250 |
case X86ISD::Wrapper: |
2250 |
case X86ISD::Wrapper: |
| 2251 |
case X86ISD::WrapperRIP: |
2251 |
case X86ISD::WrapperRIP: |
| 2252 |
if (!matchWrapper(N, AM)) |
2252 |
if (!matchWrapper(N, AM)) |
| 2253 |
return false; |
2253 |
return false; |
| 2254 |
break; |
2254 |
break; |
| 2255 |
|
2255 |
|
| 2256 |
case ISD::LOAD: |
2256 |
case ISD::LOAD: |
| 2257 |
if (!matchLoadInAddress(cast(N), AM)) |
2257 |
if (!matchLoadInAddress(cast(N), AM)) |
| 2258 |
return false; |
2258 |
return false; |
| 2259 |
break; |
2259 |
break; |
| 2260 |
|
2260 |
|
| 2261 |
case ISD::FrameIndex: |
2261 |
case ISD::FrameIndex: |
| 2262 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
2262 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
| 2263 |
AM.Base_Reg.getNode() == nullptr && |
2263 |
AM.Base_Reg.getNode() == nullptr && |
| 2264 |
(!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { |
2264 |
(!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { |
| 2265 |
AM.BaseType = X86ISelAddressMode::FrameIndexBase; |
2265 |
AM.BaseType = X86ISelAddressMode::FrameIndexBase; |
| 2266 |
AM.Base_FrameIndex = cast(N)->getIndex(); |
2266 |
AM.Base_FrameIndex = cast(N)->getIndex(); |
| 2267 |
return false; |
2267 |
return false; |
| 2268 |
} |
2268 |
} |
| 2269 |
break; |
2269 |
break; |
| 2270 |
|
2270 |
|
| 2271 |
case ISD::SHL: |
2271 |
case ISD::SHL: |
| 2272 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
2272 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
| 2273 |
break; |
2273 |
break; |
| 2274 |
|
2274 |
|
| 2275 |
if (auto *CN = dyn_cast(N.getOperand(1))) { |
2275 |
if (auto *CN = dyn_cast(N.getOperand(1))) { |
| 2276 |
unsigned Val = CN->getZExtValue(); |
2276 |
unsigned Val = CN->getZExtValue(); |
| 2277 |
// Note that we handle x<<1 as (,x,2) rather than (x,x) here so |
2277 |
// Note that we handle x<<1 as (,x,2) rather than (x,x) here so |
| 2278 |
// that the base operand remains free for further matching. If |
2278 |
// that the base operand remains free for further matching. If |
| 2279 |
// the base doesn't end up getting used, a post-processing step |
2279 |
// the base doesn't end up getting used, a post-processing step |
| 2280 |
// in MatchAddress turns (,x,2) into (x,x), which is cheaper. |
2280 |
// in MatchAddress turns (,x,2) into (x,x), which is cheaper. |
| 2281 |
if (Val == 1 || Val == 2 || Val == 3) { |
2281 |
if (Val == 1 || Val == 2 || Val == 3) { |
| 2282 |
AM.Scale = 1 << Val; |
2282 |
AM.Scale = 1 << Val; |
| 2283 |
SDValue ShVal = N.getOperand(0); |
2283 |
SDValue ShVal = N.getOperand(0); |
| 2284 |
|
2284 |
|
| 2285 |
// Okay, we know that we have a scale by now. However, if the scaled |
2285 |
// Okay, we know that we have a scale by now. However, if the scaled |
| 2286 |
// value is an add of something and a constant, we can fold the |
2286 |
// value is an add of something and a constant, we can fold the |
| 2287 |
// constant into the disp field here. |
2287 |
// constant into the disp field here. |
| 2288 |
if (CurDAG->isBaseWithConstantOffset(ShVal)) { |
2288 |
if (CurDAG->isBaseWithConstantOffset(ShVal)) { |
| 2289 |
AM.IndexReg = ShVal.getOperand(0); |
2289 |
AM.IndexReg = ShVal.getOperand(0); |
| 2290 |
auto *AddVal = cast(ShVal.getOperand(1)); |
2290 |
auto *AddVal = cast(ShVal.getOperand(1)); |
| 2291 |
uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; |
2291 |
uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; |
| 2292 |
if (!foldOffsetIntoAddress(Disp, AM)) |
2292 |
if (!foldOffsetIntoAddress(Disp, AM)) |
| 2293 |
return false; |
2293 |
return false; |
| 2294 |
} |
2294 |
} |
| 2295 |
|
2295 |
|
| 2296 |
AM.IndexReg = ShVal; |
2296 |
AM.IndexReg = ShVal; |
| 2297 |
return false; |
2297 |
return false; |
| 2298 |
} |
2298 |
} |
| 2299 |
} |
2299 |
} |
| 2300 |
break; |
2300 |
break; |
| 2301 |
|
2301 |
|
| 2302 |
case ISD::SRL: { |
2302 |
case ISD::SRL: { |
| 2303 |
// Scale must not be used already. |
2303 |
// Scale must not be used already. |
| 2304 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
2304 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
| 2305 |
|
2305 |
|
| 2306 |
// We only handle up to 64-bit values here as those are what matter for |
2306 |
// We only handle up to 64-bit values here as those are what matter for |
| 2307 |
// addressing mode optimizations. |
2307 |
// addressing mode optimizations. |
| 2308 |
assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
2308 |
assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
| 2309 |
"Unexpected value size!"); |
2309 |
"Unexpected value size!"); |
| 2310 |
|
2310 |
|
| 2311 |
SDValue And = N.getOperand(0); |
2311 |
SDValue And = N.getOperand(0); |
| 2312 |
if (And.getOpcode() != ISD::AND) break; |
2312 |
if (And.getOpcode() != ISD::AND) break; |
| 2313 |
SDValue X = And.getOperand(0); |
2313 |
SDValue X = And.getOperand(0); |
| 2314 |
|
2314 |
|
| 2315 |
// The mask used for the transform is expected to be post-shift, but we |
2315 |
// The mask used for the transform is expected to be post-shift, but we |
| 2316 |
// found the shift first so just apply the shift to the mask before passing |
2316 |
// found the shift first so just apply the shift to the mask before passing |
| 2317 |
// it down. |
2317 |
// it down. |
| 2318 |
if (!isa(N.getOperand(1)) || |
2318 |
if (!isa(N.getOperand(1)) || |
| 2319 |
!isa(And.getOperand(1))) |
2319 |
!isa(And.getOperand(1))) |
| 2320 |
break; |
2320 |
break; |
| 2321 |
uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); |
2321 |
uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); |
| 2322 |
|
2322 |
|
| 2323 |
// Try to fold the mask and shift into the scale, and return false if we |
2323 |
// Try to fold the mask and shift into the scale, and return false if we |
| 2324 |
// succeed. |
2324 |
// succeed. |
| 2325 |
if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) |
2325 |
if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) |
| 2326 |
return false; |
2326 |
return false; |
| 2327 |
break; |
2327 |
break; |
| 2328 |
} |
2328 |
} |
| 2329 |
|
2329 |
|
| 2330 |
case ISD::SMUL_LOHI: |
2330 |
case ISD::SMUL_LOHI: |
| 2331 |
case ISD::UMUL_LOHI: |
2331 |
case ISD::UMUL_LOHI: |
| 2332 |
// A mul_lohi where we need the low part can be folded as a plain multiply. |
2332 |
// A mul_lohi where we need the low part can be folded as a plain multiply. |
| 2333 |
if (N.getResNo() != 0) break; |
2333 |
if (N.getResNo() != 0) break; |
| 2334 |
[[fallthrough]]; |
2334 |
[[fallthrough]]; |
| 2335 |
case ISD::MUL: |
2335 |
case ISD::MUL: |
| 2336 |
case X86ISD::MUL_IMM: |
2336 |
case X86ISD::MUL_IMM: |
| 2337 |
// X*[3,5,9] -> X+X*[2,4,8] |
2337 |
// X*[3,5,9] -> X+X*[2,4,8] |
| 2338 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
2338 |
if (AM.BaseType == X86ISelAddressMode::RegBase && |
| 2339 |
AM.Base_Reg.getNode() == nullptr && |
2339 |
AM.Base_Reg.getNode() == nullptr && |
| 2340 |
AM.IndexReg.getNode() == nullptr) { |
2340 |
AM.IndexReg.getNode() == nullptr) { |
| 2341 |
if (auto *CN = dyn_cast(N.getOperand(1))) |
2341 |
if (auto *CN = dyn_cast(N.getOperand(1))) |
| 2342 |
if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || |
2342 |
if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || |
| 2343 |
CN->getZExtValue() == 9) { |
2343 |
CN->getZExtValue() == 9) { |
| 2344 |
AM.Scale = unsigned(CN->getZExtValue())-1; |
2344 |
AM.Scale = unsigned(CN->getZExtValue())-1; |
| 2345 |
|
2345 |
|
| 2346 |
SDValue MulVal = N.getOperand(0); |
2346 |
SDValue MulVal = N.getOperand(0); |
| 2347 |
SDValue Reg; |
2347 |
SDValue Reg; |
| 2348 |
|
2348 |
|
| 2349 |
// Okay, we know that we have a scale by now. However, if the scaled |
2349 |
// Okay, we know that we have a scale by now. However, if the scaled |
| 2350 |
// value is an add of something and a constant, we can fold the |
2350 |
// value is an add of something and a constant, we can fold the |
| 2351 |
// constant into the disp field here. |
2351 |
// constant into the disp field here. |
| 2352 |
if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && |
2352 |
if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && |
| 2353 |
isa(MulVal.getOperand(1))) { |
2353 |
isa(MulVal.getOperand(1))) { |
| 2354 |
Reg = MulVal.getOperand(0); |
2354 |
Reg = MulVal.getOperand(0); |
| 2355 |
auto *AddVal = cast(MulVal.getOperand(1)); |
2355 |
auto *AddVal = cast(MulVal.getOperand(1)); |
| 2356 |
uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); |
2356 |
uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); |
| 2357 |
if (foldOffsetIntoAddress(Disp, AM)) |
2357 |
if (foldOffsetIntoAddress(Disp, AM)) |
| 2358 |
Reg = N.getOperand(0); |
2358 |
Reg = N.getOperand(0); |
| 2359 |
} else { |
2359 |
} else { |
| 2360 |
Reg = N.getOperand(0); |
2360 |
Reg = N.getOperand(0); |
| 2361 |
} |
2361 |
} |
| 2362 |
|
2362 |
|
| 2363 |
AM.IndexReg = AM.Base_Reg = Reg; |
2363 |
AM.IndexReg = AM.Base_Reg = Reg; |
| 2364 |
return false; |
2364 |
return false; |
| 2365 |
} |
2365 |
} |
| 2366 |
} |
2366 |
} |
| 2367 |
break; |
2367 |
break; |
| 2368 |
|
2368 |
|
| 2369 |
case ISD::SUB: { |
2369 |
case ISD::SUB: { |
| 2370 |
// Given A-B, if A can be completely folded into the address and |
2370 |
// Given A-B, if A can be completely folded into the address and |
| 2371 |
// the index field with the index field unused, use -B as the index. |
2371 |
// the index field with the index field unused, use -B as the index. |
| 2372 |
// This is a win if a has multiple parts that can be folded into |
2372 |
// This is a win if a has multiple parts that can be folded into |
| 2373 |
// the address. Also, this saves a mov if the base register has |
2373 |
// the address. Also, this saves a mov if the base register has |
| 2374 |
// other uses, since it avoids a two-address sub instruction, however |
2374 |
// other uses, since it avoids a two-address sub instruction, however |
| 2375 |
// it costs an additional mov if the index register has other uses. |
2375 |
// it costs an additional mov if the index register has other uses. |
| 2376 |
|
2376 |
|
| 2377 |
// Add an artificial use to this node so that we can keep track of |
2377 |
// Add an artificial use to this node so that we can keep track of |
| 2378 |
// it if it gets CSE'd with a different node. |
2378 |
// it if it gets CSE'd with a different node. |
| 2379 |
HandleSDNode Handle(N); |
2379 |
HandleSDNode Handle(N); |
| 2380 |
|
2380 |
|
| 2381 |
// Test if the LHS of the sub can be folded. |
2381 |
// Test if the LHS of the sub can be folded. |
| 2382 |
X86ISelAddressMode Backup = AM; |
2382 |
X86ISelAddressMode Backup = AM; |
| 2383 |
if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { |
2383 |
if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { |
| 2384 |
N = Handle.getValue(); |
2384 |
N = Handle.getValue(); |
| 2385 |
AM = Backup; |
2385 |
AM = Backup; |
| 2386 |
break; |
2386 |
break; |
| 2387 |
} |
2387 |
} |
| 2388 |
N = Handle.getValue(); |
2388 |
N = Handle.getValue(); |
| 2389 |
// Test if the index field is free for use. |
2389 |
// Test if the index field is free for use. |
| 2390 |
if (AM.IndexReg.getNode() || AM.isRIPRelative()) { |
2390 |
if (AM.IndexReg.getNode() || AM.isRIPRelative()) { |
| 2391 |
AM = Backup; |
2391 |
AM = Backup; |
| 2392 |
break; |
2392 |
break; |
| 2393 |
} |
2393 |
} |
| 2394 |
|
2394 |
|
| 2395 |
int Cost = 0; |
2395 |
int Cost = 0; |
| 2396 |
SDValue RHS = N.getOperand(1); |
2396 |
SDValue RHS = N.getOperand(1); |
| 2397 |
// If the RHS involves a register with multiple uses, this |
2397 |
// If the RHS involves a register with multiple uses, this |
| 2398 |
// transformation incurs an extra mov, due to the neg instruction |
2398 |
// transformation incurs an extra mov, due to the neg instruction |
| 2399 |
// clobbering its operand. |
2399 |
// clobbering its operand. |
| 2400 |
if (!RHS.getNode()->hasOneUse() || |
2400 |
if (!RHS.getNode()->hasOneUse() || |
| 2401 |
RHS.getNode()->getOpcode() == ISD::CopyFromReg || |
2401 |
RHS.getNode()->getOpcode() == ISD::CopyFromReg || |
| 2402 |
RHS.getNode()->getOpcode() == ISD::TRUNCATE || |
2402 |
RHS.getNode()->getOpcode() == ISD::TRUNCATE || |
| 2403 |
RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || |
2403 |
RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || |
| 2404 |
(RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && |
2404 |
(RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && |
| 2405 |
RHS.getOperand(0).getValueType() == MVT::i32)) |
2405 |
RHS.getOperand(0).getValueType() == MVT::i32)) |
| 2406 |
++Cost; |
2406 |
++Cost; |
| 2407 |
// If the base is a register with multiple uses, this |
2407 |
// If the base is a register with multiple uses, this |
| 2408 |
// transformation may save a mov. |
2408 |
// transformation may save a mov. |
| 2409 |
if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && |
2409 |
if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && |
| 2410 |
!AM.Base_Reg.getNode()->hasOneUse()) || |
2410 |
!AM.Base_Reg.getNode()->hasOneUse()) || |
| 2411 |
AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
2411 |
AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
| 2412 |
--Cost; |
2412 |
--Cost; |
| 2413 |
// If the folded LHS was interesting, this transformation saves |
2413 |
// If the folded LHS was interesting, this transformation saves |
| 2414 |
// address arithmetic. |
2414 |
// address arithmetic. |
| 2415 |
if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + |
2415 |
if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + |
| 2416 |
((AM.Disp != 0) && (Backup.Disp == 0)) + |
2416 |
((AM.Disp != 0) && (Backup.Disp == 0)) + |
| 2417 |
(AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) |
2417 |
(AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) |
| 2418 |
--Cost; |
2418 |
--Cost; |
| 2419 |
// If it doesn't look like it may be an overall win, don't do it. |
2419 |
// If it doesn't look like it may be an overall win, don't do it. |
| 2420 |
if (Cost >= 0) { |
2420 |
if (Cost >= 0) { |
| 2421 |
AM = Backup; |
2421 |
AM = Backup; |
| 2422 |
break; |
2422 |
break; |
| 2423 |
} |
2423 |
} |
| 2424 |
|
2424 |
|
| 2425 |
// Ok, the transformation is legal and appears profitable. Go for it. |
2425 |
// Ok, the transformation is legal and appears profitable. Go for it. |
| 2426 |
// Negation will be emitted later to avoid creating dangling nodes if this |
2426 |
// Negation will be emitted later to avoid creating dangling nodes if this |
| 2427 |
// was an unprofitable LEA. |
2427 |
// was an unprofitable LEA. |
| 2428 |
AM.IndexReg = RHS; |
2428 |
AM.IndexReg = RHS; |
| 2429 |
AM.NegateIndex = true; |
2429 |
AM.NegateIndex = true; |
| 2430 |
AM.Scale = 1; |
2430 |
AM.Scale = 1; |
| 2431 |
return false; |
2431 |
return false; |
| 2432 |
} |
2432 |
} |
| 2433 |
|
2433 |
|
| 2434 |
case ISD::ADD: |
2434 |
case ISD::ADD: |
| 2435 |
if (!matchAdd(N, AM, Depth)) |
2435 |
if (!matchAdd(N, AM, Depth)) |
| 2436 |
return false; |
2436 |
return false; |
| 2437 |
break; |
2437 |
break; |
| 2438 |
|
2438 |
|
| 2439 |
case ISD::OR: |
2439 |
case ISD::OR: |
| 2440 |
// We want to look through a transform in InstCombine and DAGCombiner that |
2440 |
// We want to look through a transform in InstCombine and DAGCombiner that |
| 2441 |
// turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. |
2441 |
// turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. |
| 2442 |
// Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) |
2442 |
// Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) |
| 2443 |
// An 'lea' can then be used to match the shift (multiply) and add: |
2443 |
// An 'lea' can then be used to match the shift (multiply) and add: |
| 2444 |
// and $1, %esi |
2444 |
// and $1, %esi |
| 2445 |
// lea (%rsi, %rdi, 8), %rax |
2445 |
// lea (%rsi, %rdi, 8), %rax |
| 2446 |
if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && |
2446 |
if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && |
| 2447 |
!matchAdd(N, AM, Depth)) |
2447 |
!matchAdd(N, AM, Depth)) |
| 2448 |
return false; |
2448 |
return false; |
| 2449 |
break; |
2449 |
break; |
| 2450 |
|
2450 |
|
| 2451 |
case ISD::XOR: |
2451 |
case ISD::XOR: |
| 2452 |
// We want to look through a transform in InstCombine that |
2452 |
// We want to look through a transform in InstCombine that |
| 2453 |
// turns 'add' with min_signed_val into 'xor', so we can treat this 'xor' |
2453 |
// turns 'add' with min_signed_val into 'xor', so we can treat this 'xor' |
| 2454 |
// exactly like an 'add'. |
2454 |
// exactly like an 'add'. |
| 2455 |
if (isMinSignedConstant(N.getOperand(1)) && !matchAdd(N, AM, Depth)) |
2455 |
if (isMinSignedConstant(N.getOperand(1)) && !matchAdd(N, AM, Depth)) |
| 2456 |
return false; |
2456 |
return false; |
| 2457 |
break; |
2457 |
break; |
| 2458 |
|
2458 |
|
| 2459 |
case ISD::AND: { |
2459 |
case ISD::AND: { |
| 2460 |
// Perform some heroic transforms on an and of a constant-count shift |
2460 |
// Perform some heroic transforms on an and of a constant-count shift |
| 2461 |
// with a constant to enable use of the scaled offset field. |
2461 |
// with a constant to enable use of the scaled offset field. |
| 2462 |
|
2462 |
|
| 2463 |
// Scale must not be used already. |
2463 |
// Scale must not be used already. |
| 2464 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
2464 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
| 2465 |
|
2465 |
|
| 2466 |
// We only handle up to 64-bit values here as those are what matter for |
2466 |
// We only handle up to 64-bit values here as those are what matter for |
| 2467 |
// addressing mode optimizations. |
2467 |
// addressing mode optimizations. |
| 2468 |
assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
2468 |
assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
| 2469 |
"Unexpected value size!"); |
2469 |
"Unexpected value size!"); |
| 2470 |
|
2470 |
|
| 2471 |
if (!isa(N.getOperand(1))) |
2471 |
if (!isa(N.getOperand(1))) |
| 2472 |
break; |
2472 |
break; |
| 2473 |
|
2473 |
|
| 2474 |
if (N.getOperand(0).getOpcode() == ISD::SRL) { |
2474 |
if (N.getOperand(0).getOpcode() == ISD::SRL) { |
| 2475 |
SDValue Shift = N.getOperand(0); |
2475 |
SDValue Shift = N.getOperand(0); |
| 2476 |
SDValue X = Shift.getOperand(0); |
2476 |
SDValue X = Shift.getOperand(0); |
| 2477 |
|
2477 |
|
| 2478 |
uint64_t Mask = N.getConstantOperandVal(1); |
2478 |
uint64_t Mask = N.getConstantOperandVal(1); |
| 2479 |
|
2479 |
|
| 2480 |
// Try to fold the mask and shift into an extract and scale. |
2480 |
// Try to fold the mask and shift into an extract and scale. |
| 2481 |
if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) |
2481 |
if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) |
| 2482 |
return false; |
2482 |
return false; |
| 2483 |
|
2483 |
|
| 2484 |
// Try to fold the mask and shift directly into the scale. |
2484 |
// Try to fold the mask and shift directly into the scale. |
| 2485 |
if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) |
2485 |
if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) |
| 2486 |
return false; |
2486 |
return false; |
| 2487 |
|
2487 |
|
| 2488 |
// Try to fold the mask and shift into BEXTR and scale. |
2488 |
// Try to fold the mask and shift into BEXTR and scale. |
| 2489 |
if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) |
2489 |
if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) |
| 2490 |
return false; |
2490 |
return false; |
| 2491 |
} |
2491 |
} |
| 2492 |
|
2492 |
|
| 2493 |
// Try to swap the mask and shift to place shifts which can be done as |
2493 |
// Try to swap the mask and shift to place shifts which can be done as |
| 2494 |
// a scale on the outside of the mask. |
2494 |
// a scale on the outside of the mask. |
| 2495 |
if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) |
2495 |
if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) |
| 2496 |
return false; |
2496 |
return false; |
| 2497 |
|
2497 |
|
| 2498 |
break; |
2498 |
break; |
| 2499 |
} |
2499 |
} |
| 2500 |
case ISD::ZERO_EXTEND: { |
2500 |
case ISD::ZERO_EXTEND: { |
| 2501 |
// Try to widen a zexted shift left to the same size as its use, so we can |
2501 |
// Try to widen a zexted shift left to the same size as its use, so we can |
| 2502 |
// match the shift as a scale factor. |
2502 |
// match the shift as a scale factor. |
| 2503 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
2503 |
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
| 2504 |
break; |
2504 |
break; |
| 2505 |
|
2505 |
|
| 2506 |
// Peek through mask: zext(and(shl(x,c1),c2)) |
2506 |
// Peek through mask: zext(and(shl(x,c1),c2)) |
| 2507 |
SDValue Src = N.getOperand(0); |
2507 |
SDValue Src = N.getOperand(0); |
| 2508 |
APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits()); |
2508 |
APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits()); |
| 2509 |
if (Src.getOpcode() == ISD::AND && Src.hasOneUse()) |
2509 |
if (Src.getOpcode() == ISD::AND && Src.hasOneUse()) |
| 2510 |
if (auto *MaskC = dyn_cast(Src.getOperand(1))) { |
2510 |
if (auto *MaskC = dyn_cast(Src.getOperand(1))) { |
| 2511 |
Mask = MaskC->getAPIntValue(); |
2511 |
Mask = MaskC->getAPIntValue(); |
| 2512 |
Src = Src.getOperand(0); |
2512 |
Src = Src.getOperand(0); |
| 2513 |
} |
2513 |
} |
| 2514 |
|
2514 |
|
| 2515 |
if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) { |
2515 |
if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) { |
| 2516 |
// Give up if the shift is not a valid scale factor [1,2,3]. |
2516 |
// Give up if the shift is not a valid scale factor [1,2,3]. |
| 2517 |
SDValue ShlSrc = Src.getOperand(0); |
2517 |
SDValue ShlSrc = Src.getOperand(0); |
| 2518 |
SDValue ShlAmt = Src.getOperand(1); |
2518 |
SDValue ShlAmt = Src.getOperand(1); |
| 2519 |
auto *ShAmtC = dyn_cast(ShlAmt); |
2519 |
auto *ShAmtC = dyn_cast(ShlAmt); |
| 2520 |
if (!ShAmtC) |
2520 |
if (!ShAmtC) |
| 2521 |
break; |
2521 |
break; |
| 2522 |
unsigned ShAmtV = ShAmtC->getZExtValue(); |
2522 |
unsigned ShAmtV = ShAmtC->getZExtValue(); |
| 2523 |
if (ShAmtV > 3) |
2523 |
if (ShAmtV > 3) |
| 2524 |
break; |
2524 |
break; |
| 2525 |
|
2525 |
|
| 2526 |
// The narrow shift must only shift out zero bits (it must be 'nuw'). |
2526 |
// The narrow shift must only shift out zero bits (it must be 'nuw'). |
| 2527 |
// That makes it safe to widen to the destination type. |
2527 |
// That makes it safe to widen to the destination type. |
| 2528 |
APInt HighZeros = |
2528 |
APInt HighZeros = |
| 2529 |
APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV); |
2529 |
APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV); |
| 2530 |
if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask)) |
2530 |
if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask)) |
| 2531 |
break; |
2531 |
break; |
| 2532 |
|
2532 |
|
| 2533 |
// zext (shl nuw i8 %x, C1) to i32 |
2533 |
// zext (shl nuw i8 %x, C1) to i32 |
| 2534 |
// --> shl (zext i8 %x to i32), (zext C1) |
2534 |
// --> shl (zext i8 %x to i32), (zext C1) |
| 2535 |
// zext (and (shl nuw i8 %x, C1), C2) to i32 |
2535 |
// zext (and (shl nuw i8 %x, C1), C2) to i32 |
| 2536 |
// --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1) |
2536 |
// --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1) |
| 2537 |
MVT SrcVT = ShlSrc.getSimpleValueType(); |
2537 |
MVT SrcVT = ShlSrc.getSimpleValueType(); |
| 2538 |
MVT VT = N.getSimpleValueType(); |
2538 |
MVT VT = N.getSimpleValueType(); |
| 2539 |
SDLoc DL(N); |
2539 |
SDLoc DL(N); |
| 2540 |
|
2540 |
|
| 2541 |
SDValue Res = ShlSrc; |
2541 |
SDValue Res = ShlSrc; |
| 2542 |
if (!Mask.isAllOnes()) { |
2542 |
if (!Mask.isAllOnes()) { |
| 2543 |
Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT); |
2543 |
Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT); |
| 2544 |
insertDAGNode(*CurDAG, N, Res); |
2544 |
insertDAGNode(*CurDAG, N, Res); |
| 2545 |
Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res); |
2545 |
Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res); |
| 2546 |
insertDAGNode(*CurDAG, N, Res); |
2546 |
insertDAGNode(*CurDAG, N, Res); |
| 2547 |
} |
2547 |
} |
| 2548 |
SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res); |
2548 |
SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res); |
| 2549 |
insertDAGNode(*CurDAG, N, Zext); |
2549 |
insertDAGNode(*CurDAG, N, Zext); |
| 2550 |
SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt); |
2550 |
SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt); |
| 2551 |
insertDAGNode(*CurDAG, N, NewShl); |
2551 |
insertDAGNode(*CurDAG, N, NewShl); |
| 2552 |
|
2552 |
|
| 2553 |
// Convert the shift to scale factor. |
2553 |
// Convert the shift to scale factor. |
| 2554 |
AM.Scale = 1 << ShAmtV; |
2554 |
AM.Scale = 1 << ShAmtV; |
| 2555 |
AM.IndexReg = Zext; |
2555 |
AM.IndexReg = Zext; |
| 2556 |
|
2556 |
|
| 2557 |
CurDAG->ReplaceAllUsesWith(N, NewShl); |
2557 |
CurDAG->ReplaceAllUsesWith(N, NewShl); |
| 2558 |
CurDAG->RemoveDeadNode(N.getNode()); |
2558 |
CurDAG->RemoveDeadNode(N.getNode()); |
| 2559 |
return false; |
2559 |
return false; |
| 2560 |
} |
2560 |
} |
| 2561 |
|
2561 |
|
| 2562 |
// Try to fold the mask and shift into an extract and scale. |
2562 |
// Try to fold the mask and shift into an extract and scale. |
| 2563 |
if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes() && |
2563 |
if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes() && |
| 2564 |
!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src, |
2564 |
!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src, |
| 2565 |
Src.getOperand(0), AM)) |
2565 |
Src.getOperand(0), AM)) |
| 2566 |
return false; |
2566 |
return false; |
| 2567 |
|
2567 |
|
| 2568 |
break; |
2568 |
break; |
| 2569 |
} |
2569 |
} |
| 2570 |
} |
2570 |
} |
| 2571 |
|
2571 |
|
| 2572 |
return matchAddressBase(N, AM); |
2572 |
return matchAddressBase(N, AM); |
| 2573 |
} |
2573 |
} |
| 2574 |
|
2574 |
|
| 2575 |
/// Helper for MatchAddress. Add the specified node to the |
2575 |
/// Helper for MatchAddress. Add the specified node to the |
| 2576 |
/// specified addressing mode without any further recursion. |
2576 |
/// specified addressing mode without any further recursion. |
| 2577 |
bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { |
2577 |
bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { |
| 2578 |
// Is the base register already occupied? |
2578 |
// Is the base register already occupied? |
| 2579 |
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { |
2579 |
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { |
| 2580 |
// If so, check to see if the scale index register is set. |
2580 |
// If so, check to see if the scale index register is set. |
| 2581 |
if (!AM.IndexReg.getNode()) { |
2581 |
if (!AM.IndexReg.getNode()) { |
| 2582 |
AM.IndexReg = N; |
2582 |
AM.IndexReg = N; |
| 2583 |
AM.Scale = 1; |
2583 |
AM.Scale = 1; |
| 2584 |
return false; |
2584 |
return false; |
| 2585 |
} |
2585 |
} |
| 2586 |
|
2586 |
|
| 2587 |
// Otherwise, we cannot select it. |
2587 |
// Otherwise, we cannot select it. |
| 2588 |
return true; |
2588 |
return true; |
| 2589 |
} |
2589 |
} |
| 2590 |
|
2590 |
|
| 2591 |
// Default, generate it as a register. |
2591 |
// Default, generate it as a register. |
| 2592 |
AM.BaseType = X86ISelAddressMode::RegBase; |
2592 |
AM.BaseType = X86ISelAddressMode::RegBase; |
| 2593 |
AM.Base_Reg = N; |
2593 |
AM.Base_Reg = N; |
| 2594 |
return false; |
2594 |
return false; |
| 2595 |
} |
2595 |
} |
| 2596 |
|
2596 |
|
| 2597 |
bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N, |
2597 |
bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N, |
| 2598 |
X86ISelAddressMode &AM, |
2598 |
X86ISelAddressMode &AM, |
| 2599 |
unsigned Depth) { |
2599 |
unsigned Depth) { |
| 2600 |
SDLoc dl(N); |
2600 |
SDLoc dl(N); |
| 2601 |
LLVM_DEBUG({ |
2601 |
LLVM_DEBUG({ |
| 2602 |
dbgs() << "MatchVectorAddress: "; |
2602 |
dbgs() << "MatchVectorAddress: "; |
| 2603 |
AM.dump(CurDAG); |
2603 |
AM.dump(CurDAG); |
| 2604 |
}); |
2604 |
}); |
| 2605 |
// Limit recursion. |
2605 |
// Limit recursion. |
| 2606 |
if (Depth > 5) |
2606 |
if (Depth > 5) |
| 2607 |
return matchAddressBase(N, AM); |
2607 |
return matchAddressBase(N, AM); |
| 2608 |
|
2608 |
|
| 2609 |
// TODO: Support other operations. |
2609 |
// TODO: Support other operations. |
| 2610 |
switch (N.getOpcode()) { |
2610 |
switch (N.getOpcode()) { |
| 2611 |
case ISD::Constant: { |
2611 |
case ISD::Constant: { |
| 2612 |
uint64_t Val = cast(N)->getSExtValue(); |
2612 |
uint64_t Val = cast(N)->getSExtValue(); |
| 2613 |
if (!foldOffsetIntoAddress(Val, AM)) |
2613 |
if (!foldOffsetIntoAddress(Val, AM)) |
| 2614 |
return false; |
2614 |
return false; |
| 2615 |
break; |
2615 |
break; |
| 2616 |
} |
2616 |
} |
| 2617 |
case X86ISD::Wrapper: |
2617 |
case X86ISD::Wrapper: |
| 2618 |
if (!matchWrapper(N, AM)) |
2618 |
if (!matchWrapper(N, AM)) |
| 2619 |
return false; |
2619 |
return false; |
| 2620 |
break; |
2620 |
break; |
| 2621 |
case ISD::ADD: { |
2621 |
case ISD::ADD: { |
| 2622 |
// Add an artificial use to this node so that we can keep track of |
2622 |
// Add an artificial use to this node so that we can keep track of |
| 2623 |
// it if it gets CSE'd with a different node. |
2623 |
// it if it gets CSE'd with a different node. |
| 2624 |
HandleSDNode Handle(N); |
2624 |
HandleSDNode Handle(N); |
| 2625 |
|
2625 |
|
| 2626 |
X86ISelAddressMode Backup = AM; |
2626 |
X86ISelAddressMode Backup = AM; |
| 2627 |
if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) && |
2627 |
if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) && |
| 2628 |
!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM, |
2628 |
!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM, |
| 2629 |
Depth + 1)) |
2629 |
Depth + 1)) |
| 2630 |
return false; |
2630 |
return false; |
| 2631 |
AM = Backup; |
2631 |
AM = Backup; |
| 2632 |
|
2632 |
|
| 2633 |
// Try again after commuting the operands. |
2633 |
// Try again after commuting the operands. |
| 2634 |
if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM, |
2634 |
if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM, |
| 2635 |
Depth + 1) && |
2635 |
Depth + 1) && |
| 2636 |
!matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM, |
2636 |
!matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM, |
| 2637 |
Depth + 1)) |
2637 |
Depth + 1)) |
| 2638 |
return false; |
2638 |
return false; |
| 2639 |
AM = Backup; |
2639 |
AM = Backup; |
| 2640 |
|
2640 |
|
| 2641 |
N = Handle.getValue(); |
2641 |
N = Handle.getValue(); |
| 2642 |
break; |
2642 |
break; |
| 2643 |
} |
2643 |
} |
| 2644 |
} |
2644 |
} |
| 2645 |
|
2645 |
|
| 2646 |
return matchAddressBase(N, AM); |
2646 |
return matchAddressBase(N, AM); |
| 2647 |
} |
2647 |
} |
| 2648 |
|
2648 |
|
| 2649 |
/// Helper for selectVectorAddr. Handles things that can be folded into a |
2649 |
/// Helper for selectVectorAddr. Handles things that can be folded into a |
| 2650 |
/// gather/scatter address. The index register and scale should have already |
2650 |
/// gather/scatter address. The index register and scale should have already |
| 2651 |
/// been handled. |
2651 |
/// been handled. |
| 2652 |
bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { |
2652 |
bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { |
| 2653 |
return matchVectorAddressRecursively(N, AM, 0); |
2653 |
return matchVectorAddressRecursively(N, AM, 0); |
| 2654 |
} |
2654 |
} |
| 2655 |
|
2655 |
|
| 2656 |
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, |
2656 |
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, |
| 2657 |
SDValue IndexOp, SDValue ScaleOp, |
2657 |
SDValue IndexOp, SDValue ScaleOp, |
| 2658 |
SDValue &Base, SDValue &Scale, |
2658 |
SDValue &Base, SDValue &Scale, |
| 2659 |
SDValue &Index, SDValue &Disp, |
2659 |
SDValue &Index, SDValue &Disp, |
| 2660 |
SDValue &Segment) { |
2660 |
SDValue &Segment) { |
| 2661 |
X86ISelAddressMode AM; |
2661 |
X86ISelAddressMode AM; |
| 2662 |
AM.IndexReg = IndexOp; |
2662 |
AM.IndexReg = IndexOp; |
| 2663 |
AM.Scale = cast(ScaleOp)->getZExtValue(); |
2663 |
AM.Scale = cast(ScaleOp)->getZExtValue(); |
| 2664 |
|
2664 |
|
| 2665 |
unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); |
2665 |
unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); |
| 2666 |
if (AddrSpace == X86AS::GS) |
2666 |
if (AddrSpace == X86AS::GS) |
| 2667 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
2667 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
| 2668 |
if (AddrSpace == X86AS::FS) |
2668 |
if (AddrSpace == X86AS::FS) |
| 2669 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
2669 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
| 2670 |
if (AddrSpace == X86AS::SS) |
2670 |
if (AddrSpace == X86AS::SS) |
| 2671 |
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
2671 |
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
| 2672 |
|
2672 |
|
| 2673 |
SDLoc DL(BasePtr); |
2673 |
SDLoc DL(BasePtr); |
| 2674 |
MVT VT = BasePtr.getSimpleValueType(); |
2674 |
MVT VT = BasePtr.getSimpleValueType(); |
| 2675 |
|
2675 |
|
| 2676 |
// Try to match into the base and displacement fields. |
2676 |
// Try to match into the base and displacement fields. |
| 2677 |
if (matchVectorAddress(BasePtr, AM)) |
2677 |
if (matchVectorAddress(BasePtr, AM)) |
| 2678 |
return false; |
2678 |
return false; |
| 2679 |
|
2679 |
|
| 2680 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
2680 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
| 2681 |
return true; |
2681 |
return true; |
| 2682 |
} |
2682 |
} |
| 2683 |
|
2683 |
|
| 2684 |
/// Returns true if it is able to pattern match an addressing mode. |
2684 |
/// Returns true if it is able to pattern match an addressing mode. |
| 2685 |
/// It returns the operands which make up the maximal addressing mode it can |
2685 |
/// It returns the operands which make up the maximal addressing mode it can |
| 2686 |
/// match by reference. |
2686 |
/// match by reference. |
| 2687 |
/// |
2687 |
/// |
| 2688 |
/// Parent is the parent node of the addr operand that is being matched. It |
2688 |
/// Parent is the parent node of the addr operand that is being matched. It |
| 2689 |
/// is always a load, store, atomic node, or null. It is only null when |
2689 |
/// is always a load, store, atomic node, or null. It is only null when |
| 2690 |
/// checking memory operands for inline asm nodes. |
2690 |
/// checking memory operands for inline asm nodes. |
| 2691 |
bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
2691 |
bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
| 2692 |
SDValue &Scale, SDValue &Index, |
2692 |
SDValue &Scale, SDValue &Index, |
| 2693 |
SDValue &Disp, SDValue &Segment) { |
2693 |
SDValue &Disp, SDValue &Segment) { |
| 2694 |
X86ISelAddressMode AM; |
2694 |
X86ISelAddressMode AM; |
| 2695 |
|
2695 |
|
| 2696 |
if (Parent && |
2696 |
if (Parent && |
| 2697 |
// This list of opcodes are all the nodes that have an "addr:$ptr" operand |
2697 |
// This list of opcodes are all the nodes that have an "addr:$ptr" operand |
| 2698 |
// that are not a MemSDNode, and thus don't have proper addrspace info. |
2698 |
// that are not a MemSDNode, and thus don't have proper addrspace info. |
| 2699 |
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme |
2699 |
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme |
| 2700 |
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores |
2700 |
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores |
| 2701 |
Parent->getOpcode() != X86ISD::TLSCALL && // Fixme |
2701 |
Parent->getOpcode() != X86ISD::TLSCALL && // Fixme |
| 2702 |
Parent->getOpcode() != X86ISD::ENQCMD && // Fixme |
2702 |
Parent->getOpcode() != X86ISD::ENQCMD && // Fixme |
| 2703 |
Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme |
2703 |
Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme |
| 2704 |
Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp |
2704 |
Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp |
| 2705 |
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp |
2705 |
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp |
| 2706 |
unsigned AddrSpace = |
2706 |
unsigned AddrSpace = |
| 2707 |
cast(Parent)->getPointerInfo().getAddrSpace(); |
2707 |
cast(Parent)->getPointerInfo().getAddrSpace(); |
| 2708 |
if (AddrSpace == X86AS::GS) |
2708 |
if (AddrSpace == X86AS::GS) |
| 2709 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
2709 |
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
| 2710 |
if (AddrSpace == X86AS::FS) |
2710 |
if (AddrSpace == X86AS::FS) |
| 2711 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
2711 |
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
| 2712 |
if (AddrSpace == X86AS::SS) |
2712 |
if (AddrSpace == X86AS::SS) |
| 2713 |
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
2713 |
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
| 2714 |
} |
2714 |
} |
| 2715 |
|
2715 |
|
| 2716 |
// Save the DL and VT before calling matchAddress, it can invalidate N. |
2716 |
// Save the DL and VT before calling matchAddress, it can invalidate N. |
| 2717 |
SDLoc DL(N); |
2717 |
SDLoc DL(N); |
| 2718 |
MVT VT = N.getSimpleValueType(); |
2718 |
MVT VT = N.getSimpleValueType(); |
| 2719 |
|
2719 |
|
| 2720 |
if (matchAddress(N, AM)) |
2720 |
if (matchAddress(N, AM)) |
| 2721 |
return false; |
2721 |
return false; |
| 2722 |
|
2722 |
|
| 2723 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
2723 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
| 2724 |
return true; |
2724 |
return true; |
| 2725 |
} |
2725 |
} |
| 2726 |
|
2726 |
|
| 2727 |
bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { |
2727 |
bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { |
| 2728 |
// In static codegen with small code model, we can get the address of a label |
2728 |
// In static codegen with small code model, we can get the address of a label |
| 2729 |
// into a register with 'movl' |
2729 |
// into a register with 'movl' |
| 2730 |
if (N->getOpcode() != X86ISD::Wrapper) |
2730 |
if (N->getOpcode() != X86ISD::Wrapper) |
| 2731 |
return false; |
2731 |
return false; |
| 2732 |
|
2732 |
|
| 2733 |
N = N.getOperand(0); |
2733 |
N = N.getOperand(0); |
| 2734 |
|
2734 |
|
| 2735 |
// At least GNU as does not accept 'movl' for TPOFF relocations. |
2735 |
// At least GNU as does not accept 'movl' for TPOFF relocations. |
| 2736 |
// FIXME: We could use 'movl' when we know we are targeting MC. |
2736 |
// FIXME: We could use 'movl' when we know we are targeting MC. |
| 2737 |
if (N->getOpcode() == ISD::TargetGlobalTLSAddress) |
2737 |
if (N->getOpcode() == ISD::TargetGlobalTLSAddress) |
| 2738 |
return false; |
2738 |
return false; |
| 2739 |
|
2739 |
|
| 2740 |
Imm = N; |
2740 |
Imm = N; |
| 2741 |
if (N->getOpcode() != ISD::TargetGlobalAddress) |
2741 |
if (N->getOpcode() != ISD::TargetGlobalAddress) |
| 2742 |
return TM.getCodeModel() == CodeModel::Small; |
2742 |
return TM.getCodeModel() == CodeModel::Small; |
| 2743 |
|
2743 |
|
| 2744 |
std::optional CR = |
2744 |
std::optional CR = |
| 2745 |
cast(N)->getGlobal()->getAbsoluteSymbolRange(); |
2745 |
cast(N)->getGlobal()->getAbsoluteSymbolRange(); |
| 2746 |
if (!CR) |
2746 |
if (!CR) |
| 2747 |
return TM.getCodeModel() == CodeModel::Small; |
2747 |
return TM.getCodeModel() == CodeModel::Small; |
| 2748 |
|
2748 |
|
| 2749 |
return CR->getUnsignedMax().ult(1ull << 32); |
2749 |
return CR->getUnsignedMax().ult(1ull << 32); |
| 2750 |
} |
2750 |
} |
| 2751 |
|
2751 |
|
| 2752 |
bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, |
2752 |
bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, |
| 2753 |
SDValue &Scale, SDValue &Index, |
2753 |
SDValue &Scale, SDValue &Index, |
| 2754 |
SDValue &Disp, SDValue &Segment) { |
2754 |
SDValue &Disp, SDValue &Segment) { |
| 2755 |
// Save the debug loc before calling selectLEAAddr, in case it invalidates N. |
2755 |
// Save the debug loc before calling selectLEAAddr, in case it invalidates N. |
| 2756 |
SDLoc DL(N); |
2756 |
SDLoc DL(N); |
| 2757 |
|
2757 |
|
| 2758 |
if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) |
2758 |
if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) |
| 2759 |
return false; |
2759 |
return false; |
| 2760 |
|
2760 |
|
| 2761 |
auto *RN = dyn_cast(Base); |
2761 |
auto *RN = dyn_cast(Base); |
| 2762 |
if (RN && RN->getReg() == 0) |
2762 |
if (RN && RN->getReg() == 0) |
| 2763 |
Base = CurDAG->getRegister(0, MVT::i64); |
2763 |
Base = CurDAG->getRegister(0, MVT::i64); |
| 2764 |
else if (Base.getValueType() == MVT::i32 && !isa(Base)) { |
2764 |
else if (Base.getValueType() == MVT::i32 && !isa(Base)) { |
| 2765 |
// Base could already be %rip, particularly in the x32 ABI. |
2765 |
// Base could already be %rip, particularly in the x32 ABI. |
| 2766 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
2766 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
| 2767 |
MVT::i64), 0); |
2767 |
MVT::i64), 0); |
| 2768 |
Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
2768 |
Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
| 2769 |
Base); |
2769 |
Base); |
| 2770 |
} |
2770 |
} |
| 2771 |
|
2771 |
|
| 2772 |
RN = dyn_cast(Index); |
2772 |
RN = dyn_cast(Index); |
| 2773 |
if (RN && RN->getReg() == 0) |
2773 |
if (RN && RN->getReg() == 0) |
| 2774 |
Index = CurDAG->getRegister(0, MVT::i64); |
2774 |
Index = CurDAG->getRegister(0, MVT::i64); |
| 2775 |
else { |
2775 |
else { |
| 2776 |
assert(Index.getValueType() == MVT::i32 && |
2776 |
assert(Index.getValueType() == MVT::i32 && |
| 2777 |
"Expect to be extending 32-bit registers for use in LEA"); |
2777 |
"Expect to be extending 32-bit registers for use in LEA"); |
| 2778 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
2778 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
| 2779 |
MVT::i64), 0); |
2779 |
MVT::i64), 0); |
| 2780 |
Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
2780 |
Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
| 2781 |
Index); |
2781 |
Index); |
| 2782 |
} |
2782 |
} |
| 2783 |
|
2783 |
|
| 2784 |
return true; |
2784 |
return true; |
| 2785 |
} |
2785 |
} |
| 2786 |
|
2786 |
|
| 2787 |
/// Calls SelectAddr and determines if the maximal addressing |
2787 |
/// Calls SelectAddr and determines if the maximal addressing |
| 2788 |
/// mode it matches can be cost effectively emitted as an LEA instruction. |
2788 |
/// mode it matches can be cost effectively emitted as an LEA instruction. |
| 2789 |
bool X86DAGToDAGISel::selectLEAAddr(SDValue N, |
2789 |
bool X86DAGToDAGISel::selectLEAAddr(SDValue N, |
| 2790 |
SDValue &Base, SDValue &Scale, |
2790 |
SDValue &Base, SDValue &Scale, |
| 2791 |
SDValue &Index, SDValue &Disp, |
2791 |
SDValue &Index, SDValue &Disp, |
| 2792 |
SDValue &Segment) { |
2792 |
SDValue &Segment) { |
| 2793 |
X86ISelAddressMode AM; |
2793 |
X86ISelAddressMode AM; |
| 2794 |
|
2794 |
|
| 2795 |
// Save the DL and VT before calling matchAddress, it can invalidate N. |
2795 |
// Save the DL and VT before calling matchAddress, it can invalidate N. |
| 2796 |
SDLoc DL(N); |
2796 |
SDLoc DL(N); |
| 2797 |
MVT VT = N.getSimpleValueType(); |
2797 |
MVT VT = N.getSimpleValueType(); |
| 2798 |
|
2798 |
|
| 2799 |
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support |
2799 |
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support |
| 2800 |
// segments. |
2800 |
// segments. |
| 2801 |
SDValue Copy = AM.Segment; |
2801 |
SDValue Copy = AM.Segment; |
| 2802 |
SDValue T = CurDAG->getRegister(0, MVT::i32); |
2802 |
SDValue T = CurDAG->getRegister(0, MVT::i32); |
| 2803 |
AM.Segment = T; |
2803 |
AM.Segment = T; |
| 2804 |
if (matchAddress(N, AM)) |
2804 |
if (matchAddress(N, AM)) |
| 2805 |
return false; |
2805 |
return false; |
| 2806 |
assert (T == AM.Segment); |
2806 |
assert (T == AM.Segment); |
| 2807 |
AM.Segment = Copy; |
2807 |
AM.Segment = Copy; |
| 2808 |
|
2808 |
|
| 2809 |
unsigned Complexity = 0; |
2809 |
unsigned Complexity = 0; |
| 2810 |
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) |
2810 |
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) |
| 2811 |
Complexity = 1; |
2811 |
Complexity = 1; |
| 2812 |
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
2812 |
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
| 2813 |
Complexity = 4; |
2813 |
Complexity = 4; |
| 2814 |
|
2814 |
|
| 2815 |
if (AM.IndexReg.getNode()) |
2815 |
if (AM.IndexReg.getNode()) |
| 2816 |
Complexity++; |
2816 |
Complexity++; |
| 2817 |
|
2817 |
|
| 2818 |
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with |
2818 |
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with |
| 2819 |
// a simple shift. |
2819 |
// a simple shift. |
| 2820 |
if (AM.Scale > 1) |
2820 |
if (AM.Scale > 1) |
| 2821 |
Complexity++; |
2821 |
Complexity++; |
| 2822 |
|
2822 |
|
| 2823 |
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA |
2823 |
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA |
| 2824 |
// to a LEA. This is determined with some experimentation but is by no means |
2824 |
// to a LEA. This is determined with some experimentation but is by no means |
| 2825 |
// optimal (especially for code size consideration). LEA is nice because of |
2825 |
// optimal (especially for code size consideration). LEA is nice because of |
| 2826 |
// its three-address nature. Tweak the cost function again when we can run |
2826 |
// its three-address nature. Tweak the cost function again when we can run |
| 2827 |
// convertToThreeAddress() at register allocation time. |
2827 |
// convertToThreeAddress() at register allocation time. |
| 2828 |
if (AM.hasSymbolicDisplacement()) { |
2828 |
if (AM.hasSymbolicDisplacement()) { |
| 2829 |
// For X86-64, always use LEA to materialize RIP-relative addresses. |
2829 |
// For X86-64, always use LEA to materialize RIP-relative addresses. |
| 2830 |
if (Subtarget->is64Bit()) |
2830 |
if (Subtarget->is64Bit()) |
| 2831 |
Complexity = 4; |
2831 |
Complexity = 4; |
| 2832 |
else |
2832 |
else |
| 2833 |
Complexity += 2; |
2833 |
Complexity += 2; |
| 2834 |
} |
2834 |
} |
| 2835 |
|
2835 |
|
| 2836 |
// Heuristic: try harder to form an LEA from ADD if the operands set flags. |
2836 |
// Heuristic: try harder to form an LEA from ADD if the operands set flags. |
| 2837 |
// Unlike ADD, LEA does not affect flags, so we will be less likely to require |
2837 |
// Unlike ADD, LEA does not affect flags, so we will be less likely to require |
| 2838 |
// duplicating flag-producing instructions later in the pipeline. |
2838 |
// duplicating flag-producing instructions later in the pipeline. |
| 2839 |
if (N.getOpcode() == ISD::ADD) { |
2839 |
if (N.getOpcode() == ISD::ADD) { |
| 2840 |
auto isMathWithFlags = [](SDValue V) { |
2840 |
auto isMathWithFlags = [](SDValue V) { |
| 2841 |
switch (V.getOpcode()) { |
2841 |
switch (V.getOpcode()) { |
| 2842 |
case X86ISD::ADD: |
2842 |
case X86ISD::ADD: |
| 2843 |
case X86ISD::SUB: |
2843 |
case X86ISD::SUB: |
| 2844 |
case X86ISD::ADC: |
2844 |
case X86ISD::ADC: |
| 2845 |
case X86ISD::SBB: |
2845 |
case X86ISD::SBB: |
| 2846 |
case X86ISD::SMUL: |
2846 |
case X86ISD::SMUL: |
| 2847 |
case X86ISD::UMUL: |
2847 |
case X86ISD::UMUL: |
| 2848 |
/* TODO: These opcodes can be added safely, but we may want to justify |
2848 |
/* TODO: These opcodes can be added safely, but we may want to justify |
| 2849 |
their inclusion for different reasons (better for reg-alloc). |
2849 |
their inclusion for different reasons (better for reg-alloc). |
| 2850 |
case X86ISD::OR: |
2850 |
case X86ISD::OR: |
| 2851 |
case X86ISD::XOR: |
2851 |
case X86ISD::XOR: |
| 2852 |
case X86ISD::AND: |
2852 |
case X86ISD::AND: |
| 2853 |
*/ |
2853 |
*/ |
| 2854 |
// Value 1 is the flag output of the node - verify it's not dead. |
2854 |
// Value 1 is the flag output of the node - verify it's not dead. |
| 2855 |
return !SDValue(V.getNode(), 1).use_empty(); |
2855 |
return !SDValue(V.getNode(), 1).use_empty(); |
| 2856 |
default: |
2856 |
default: |
| 2857 |
return false; |
2857 |
return false; |
| 2858 |
} |
2858 |
} |
| 2859 |
}; |
2859 |
}; |
| 2860 |
// TODO: We might want to factor in whether there's a load folding |
2860 |
// TODO: We might want to factor in whether there's a load folding |
| 2861 |
// opportunity for the math op that disappears with LEA. |
2861 |
// opportunity for the math op that disappears with LEA. |
| 2862 |
if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1))) |
2862 |
if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1))) |
| 2863 |
Complexity++; |
2863 |
Complexity++; |
| 2864 |
} |
2864 |
} |
| 2865 |
|
2865 |
|
| 2866 |
if (AM.Disp) |
2866 |
if (AM.Disp) |
| 2867 |
Complexity++; |
2867 |
Complexity++; |
| 2868 |
|
2868 |
|
| 2869 |
// If it isn't worth using an LEA, reject it. |
2869 |
// If it isn't worth using an LEA, reject it. |
| 2870 |
if (Complexity <= 2) |
2870 |
if (Complexity <= 2) |
| 2871 |
return false; |
2871 |
return false; |
| 2872 |
|
2872 |
|
| 2873 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
2873 |
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
| 2874 |
return true; |
2874 |
return true; |
| 2875 |
} |
2875 |
} |
| 2876 |
|
2876 |
|
| 2877 |
/// This is only run on TargetGlobalTLSAddress nodes. |
2877 |
/// This is only run on TargetGlobalTLSAddress nodes. |
| 2878 |
bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, |
2878 |
bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, |
| 2879 |
SDValue &Scale, SDValue &Index, |
2879 |
SDValue &Scale, SDValue &Index, |
| 2880 |
SDValue &Disp, SDValue &Segment) { |
2880 |
SDValue &Disp, SDValue &Segment) { |
| 2881 |
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); |
2881 |
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); |
| 2882 |
auto *GA = cast(N); |
2882 |
auto *GA = cast(N); |
| 2883 |
|
2883 |
|
| 2884 |
X86ISelAddressMode AM; |
2884 |
X86ISelAddressMode AM; |
| 2885 |
AM.GV = GA->getGlobal(); |
2885 |
AM.GV = GA->getGlobal(); |
| 2886 |
AM.Disp += GA->getOffset(); |
2886 |
AM.Disp += GA->getOffset(); |
| 2887 |
AM.SymbolFlags = GA->getTargetFlags(); |
2887 |
AM.SymbolFlags = GA->getTargetFlags(); |
| 2888 |
|
2888 |
|
| 2889 |
if (Subtarget->is32Bit()) { |
2889 |
if (Subtarget->is32Bit()) { |
| 2890 |
AM.Scale = 1; |
2890 |
AM.Scale = 1; |
| 2891 |
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); |
2891 |
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); |
| 2892 |
} |
2892 |
} |
| 2893 |
|
2893 |
|
| 2894 |
MVT VT = N.getSimpleValueType(); |
2894 |
MVT VT = N.getSimpleValueType(); |
| 2895 |
getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment); |
2895 |
getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment); |
| 2896 |
return true; |
2896 |
return true; |
| 2897 |
} |
2897 |
} |
| 2898 |
|
2898 |
|
| 2899 |
bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { |
2899 |
bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { |
| 2900 |
// Keep track of the original value type and whether this value was |
2900 |
// Keep track of the original value type and whether this value was |
| 2901 |
// truncated. If we see a truncation from pointer type to VT that truncates |
2901 |
// truncated. If we see a truncation from pointer type to VT that truncates |
| 2902 |
// bits that are known to be zero, we can use a narrow reference. |
2902 |
// bits that are known to be zero, we can use a narrow reference. |
| 2903 |
EVT VT = N.getValueType(); |
2903 |
EVT VT = N.getValueType(); |
| 2904 |
bool WasTruncated = false; |
2904 |
bool WasTruncated = false; |
| 2905 |
if (N.getOpcode() == ISD::TRUNCATE) { |
2905 |
if (N.getOpcode() == ISD::TRUNCATE) { |
| 2906 |
WasTruncated = true; |
2906 |
WasTruncated = true; |
| 2907 |
N = N.getOperand(0); |
2907 |
N = N.getOperand(0); |
| 2908 |
} |
2908 |
} |
| 2909 |
|
2909 |
|
| 2910 |
if (N.getOpcode() != X86ISD::Wrapper) |
2910 |
if (N.getOpcode() != X86ISD::Wrapper) |
| 2911 |
return false; |
2911 |
return false; |
| 2912 |
|
2912 |
|
| 2913 |
// We can only use non-GlobalValues as immediates if they were not truncated, |
2913 |
// We can only use non-GlobalValues as immediates if they were not truncated, |
| 2914 |
// as we do not have any range information. If we have a GlobalValue and the |
2914 |
// as we do not have any range information. If we have a GlobalValue and the |
| 2915 |
// address was not truncated, we can select it as an operand directly. |
2915 |
// address was not truncated, we can select it as an operand directly. |
| 2916 |
unsigned Opc = N.getOperand(0)->getOpcode(); |
2916 |
unsigned Opc = N.getOperand(0)->getOpcode(); |
| 2917 |
if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { |
2917 |
if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { |
| 2918 |
Op = N.getOperand(0); |
2918 |
Op = N.getOperand(0); |
| 2919 |
// We can only select the operand directly if we didn't have to look past a |
2919 |
// We can only select the operand directly if we didn't have to look past a |
| 2920 |
// truncate. |
2920 |
// truncate. |
| 2921 |
return !WasTruncated; |
2921 |
return !WasTruncated; |
| 2922 |
} |
2922 |
} |
| 2923 |
|
2923 |
|
| 2924 |
// Check that the global's range fits into VT. |
2924 |
// Check that the global's range fits into VT. |
| 2925 |
auto *GA = cast(N.getOperand(0)); |
2925 |
auto *GA = cast(N.getOperand(0)); |
| 2926 |
std::optional CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
2926 |
std::optional CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
| 2927 |
if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits())) |
2927 |
if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits())) |
| 2928 |
return false; |
2928 |
return false; |
| 2929 |
|
2929 |
|
| 2930 |
// Okay, we can use a narrow reference. |
2930 |
// Okay, we can use a narrow reference. |
| 2931 |
Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT, |
2931 |
Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT, |
| 2932 |
GA->getOffset(), GA->getTargetFlags()); |
2932 |
GA->getOffset(), GA->getTargetFlags()); |
| 2933 |
return true; |
2933 |
return true; |
| 2934 |
} |
2934 |
} |
| 2935 |
|
2935 |
|
| 2936 |
bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
2936 |
bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
| 2937 |
SDValue &Base, SDValue &Scale, |
2937 |
SDValue &Base, SDValue &Scale, |
| 2938 |
SDValue &Index, SDValue &Disp, |
2938 |
SDValue &Index, SDValue &Disp, |
| 2939 |
SDValue &Segment) { |
2939 |
SDValue &Segment) { |
| 2940 |
assert(Root && P && "Unknown root/parent nodes"); |
2940 |
assert(Root && P && "Unknown root/parent nodes"); |
| 2941 |
if (!ISD::isNON_EXTLoad(N.getNode()) || |
2941 |
if (!ISD::isNON_EXTLoad(N.getNode()) || |
| 2942 |
!IsProfitableToFold(N, P, Root) || |
2942 |
!IsProfitableToFold(N, P, Root) || |
| 2943 |
!IsLegalToFold(N, P, Root, OptLevel)) |
2943 |
!IsLegalToFold(N, P, Root, OptLevel)) |
| 2944 |
return false; |
2944 |
return false; |
| 2945 |
|
2945 |
|
| 2946 |
return selectAddr(N.getNode(), |
2946 |
return selectAddr(N.getNode(), |
| 2947 |
N.getOperand(1), Base, Scale, Index, Disp, Segment); |
2947 |
N.getOperand(1), Base, Scale, Index, Disp, Segment); |
| 2948 |
} |
2948 |
} |
| 2949 |
|
2949 |
|
| 2950 |
bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
2950 |
bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
| 2951 |
SDValue &Base, SDValue &Scale, |
2951 |
SDValue &Base, SDValue &Scale, |
| 2952 |
SDValue &Index, SDValue &Disp, |
2952 |
SDValue &Index, SDValue &Disp, |
| 2953 |
SDValue &Segment) { |
2953 |
SDValue &Segment) { |
| 2954 |
assert(Root && P && "Unknown root/parent nodes"); |
2954 |
assert(Root && P && "Unknown root/parent nodes"); |
| 2955 |
if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || |
2955 |
if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || |
| 2956 |
!IsProfitableToFold(N, P, Root) || |
2956 |
!IsProfitableToFold(N, P, Root) || |
| 2957 |
!IsLegalToFold(N, P, Root, OptLevel)) |
2957 |
!IsLegalToFold(N, P, Root, OptLevel)) |
| 2958 |
return false; |
2958 |
return false; |
| 2959 |
|
2959 |
|
| 2960 |
return selectAddr(N.getNode(), |
2960 |
return selectAddr(N.getNode(), |
| 2961 |
N.getOperand(1), Base, Scale, Index, Disp, Segment); |
2961 |
N.getOperand(1), Base, Scale, Index, Disp, Segment); |
| 2962 |
} |
2962 |
} |
| 2963 |
|
2963 |
|
| 2964 |
/// Return an SDNode that returns the value of the global base register. |
2964 |
/// Return an SDNode that returns the value of the global base register. |
| 2965 |
/// Output instructions required to initialize the global base register, |
2965 |
/// Output instructions required to initialize the global base register, |
| 2966 |
/// if necessary. |
2966 |
/// if necessary. |
| 2967 |
SDNode *X86DAGToDAGISel::getGlobalBaseReg() { |
2967 |
SDNode *X86DAGToDAGISel::getGlobalBaseReg() { |
| 2968 |
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); |
2968 |
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); |
| 2969 |
auto &DL = MF->getDataLayout(); |
2969 |
auto &DL = MF->getDataLayout(); |
| 2970 |
return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); |
2970 |
return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); |
| 2971 |
} |
2971 |
} |
| 2972 |
|
2972 |
|
| 2973 |
bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { |
2973 |
bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { |
| 2974 |
if (N->getOpcode() == ISD::TRUNCATE) |
2974 |
if (N->getOpcode() == ISD::TRUNCATE) |
| 2975 |
N = N->getOperand(0).getNode(); |
2975 |
N = N->getOperand(0).getNode(); |
| 2976 |
if (N->getOpcode() != X86ISD::Wrapper) |
2976 |
if (N->getOpcode() != X86ISD::Wrapper) |
| 2977 |
return false; |
2977 |
return false; |
| 2978 |
|
2978 |
|
| 2979 |
auto *GA = dyn_cast(N->getOperand(0)); |
2979 |
auto *GA = dyn_cast(N->getOperand(0)); |
| 2980 |
if (!GA) |
2980 |
if (!GA) |
| 2981 |
return false; |
2981 |
return false; |
| 2982 |
|
2982 |
|
| 2983 |
std::optional CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
2983 |
std::optional CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
| 2984 |
if (!CR) |
2984 |
if (!CR) |
| 2985 |
return Width == 32 && TM.getCodeModel() == CodeModel::Small; |
2985 |
return Width == 32 && TM.getCodeModel() == CodeModel::Small; |
| 2986 |
|
2986 |
|
| 2987 |
return CR->getSignedMin().sge(-1ull << Width) && |
2987 |
return CR->getSignedMin().sge(-1ull << Width) && |
| 2988 |
CR->getSignedMax().slt(1ull << Width); |
2988 |
CR->getSignedMax().slt(1ull << Width); |
| 2989 |
} |
2989 |
} |
| 2990 |
|
2990 |
|
| 2991 |
X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { |
2991 |
X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { |
| 2992 |
assert(N->isMachineOpcode() && "Unexpected node"); |
2992 |
assert(N->isMachineOpcode() && "Unexpected node"); |
| 2993 |
unsigned Opc = N->getMachineOpcode(); |
2993 |
unsigned Opc = N->getMachineOpcode(); |
| 2994 |
const MCInstrDesc &MCID = getInstrInfo()->get(Opc); |
2994 |
const MCInstrDesc &MCID = getInstrInfo()->get(Opc); |
| 2995 |
int CondNo = X86::getCondSrcNoFromDesc(MCID); |
2995 |
int CondNo = X86::getCondSrcNoFromDesc(MCID); |
| 2996 |
if (CondNo < 0) |
2996 |
if (CondNo < 0) |
| 2997 |
return X86::COND_INVALID; |
2997 |
return X86::COND_INVALID; |
| 2998 |
|
2998 |
|
| 2999 |
return static_cast(N->getConstantOperandVal(CondNo)); |
2999 |
return static_cast(N->getConstantOperandVal(CondNo)); |
| 3000 |
} |
3000 |
} |
| 3001 |
|
3001 |
|
| 3002 |
/// Test whether the given X86ISD::CMP node has any users that use a flag |
3002 |
/// Test whether the given X86ISD::CMP node has any users that use a flag |
| 3003 |
/// other than ZF. |
3003 |
/// other than ZF. |
| 3004 |
bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { |
3004 |
bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { |
| 3005 |
// Examine each user of the node. |
3005 |
// Examine each user of the node. |
| 3006 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
3006 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
| 3007 |
UI != UE; ++UI) { |
3007 |
UI != UE; ++UI) { |
| 3008 |
// Only check things that use the flags. |
3008 |
// Only check things that use the flags. |
| 3009 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
3009 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
| 3010 |
continue; |
3010 |
continue; |
| 3011 |
// Only examine CopyToReg uses that copy to EFLAGS. |
3011 |
// Only examine CopyToReg uses that copy to EFLAGS. |
| 3012 |
if (UI->getOpcode() != ISD::CopyToReg || |
3012 |
if (UI->getOpcode() != ISD::CopyToReg || |
| 3013 |
cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
3013 |
cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
| 3014 |
return false; |
3014 |
return false; |
| 3015 |
// Examine each user of the CopyToReg use. |
3015 |
// Examine each user of the CopyToReg use. |
| 3016 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), |
3016 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), |
| 3017 |
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
3017 |
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
| 3018 |
// Only examine the Flag result. |
3018 |
// Only examine the Flag result. |
| 3019 |
if (FlagUI.getUse().getResNo() != 1) continue; |
3019 |
if (FlagUI.getUse().getResNo() != 1) continue; |
| 3020 |
// Anything unusual: assume conservatively. |
3020 |
// Anything unusual: assume conservatively. |
| 3021 |
if (!FlagUI->isMachineOpcode()) return false; |
3021 |
if (!FlagUI->isMachineOpcode()) return false; |
| 3022 |
// Examine the condition code of the user. |
3022 |
// Examine the condition code of the user. |
| 3023 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
3023 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
| 3024 |
|
3024 |
|
| 3025 |
switch (CC) { |
3025 |
switch (CC) { |
| 3026 |
// Comparisons which only use the zero flag. |
3026 |
// Comparisons which only use the zero flag. |
| 3027 |
case X86::COND_E: case X86::COND_NE: |
3027 |
case X86::COND_E: case X86::COND_NE: |
| 3028 |
continue; |
3028 |
continue; |
| 3029 |
// Anything else: assume conservatively. |
3029 |
// Anything else: assume conservatively. |
| 3030 |
default: |
3030 |
default: |
| 3031 |
return false; |
3031 |
return false; |
| 3032 |
} |
3032 |
} |
| 3033 |
} |
3033 |
} |
| 3034 |
} |
3034 |
} |
| 3035 |
return true; |
3035 |
return true; |
| 3036 |
} |
3036 |
} |
| 3037 |
|
3037 |
|
| 3038 |
/// Test whether the given X86ISD::CMP node has any uses which require the SF |
3038 |
/// Test whether the given X86ISD::CMP node has any uses which require the SF |
| 3039 |
/// flag to be accurate. |
3039 |
/// flag to be accurate. |
| 3040 |
bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { |
3040 |
bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { |
| 3041 |
// Examine each user of the node. |
3041 |
// Examine each user of the node. |
| 3042 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
3042 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
| 3043 |
UI != UE; ++UI) { |
3043 |
UI != UE; ++UI) { |
| 3044 |
// Only check things that use the flags. |
3044 |
// Only check things that use the flags. |
| 3045 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
3045 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
| 3046 |
continue; |
3046 |
continue; |
| 3047 |
// Only examine CopyToReg uses that copy to EFLAGS. |
3047 |
// Only examine CopyToReg uses that copy to EFLAGS. |
| 3048 |
if (UI->getOpcode() != ISD::CopyToReg || |
3048 |
if (UI->getOpcode() != ISD::CopyToReg || |
| 3049 |
cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
3049 |
cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
| 3050 |
return false; |
3050 |
return false; |
| 3051 |
// Examine each user of the CopyToReg use. |
3051 |
// Examine each user of the CopyToReg use. |
| 3052 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), |
3052 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), |
| 3053 |
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
3053 |
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
| 3054 |
// Only examine the Flag result. |
3054 |
// Only examine the Flag result. |
| 3055 |
if (FlagUI.getUse().getResNo() != 1) continue; |
3055 |
if (FlagUI.getUse().getResNo() != 1) continue; |
| 3056 |
// Anything unusual: assume conservatively. |
3056 |
// Anything unusual: assume conservatively. |
| 3057 |
if (!FlagUI->isMachineOpcode()) return false; |
3057 |
if (!FlagUI->isMachineOpcode()) return false; |
| 3058 |
// Examine the condition code of the user. |
3058 |
// Examine the condition code of the user. |
| 3059 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
3059 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
| 3060 |
|
3060 |
|
| 3061 |
switch (CC) { |
3061 |
switch (CC) { |
| 3062 |
// Comparisons which don't examine the SF flag. |
3062 |
// Comparisons which don't examine the SF flag. |
| 3063 |
case X86::COND_A: case X86::COND_AE: |
3063 |
case X86::COND_A: case X86::COND_AE: |
| 3064 |
case X86::COND_B: case X86::COND_BE: |
3064 |
case X86::COND_B: case X86::COND_BE: |
| 3065 |
case X86::COND_E: case X86::COND_NE: |
3065 |
case X86::COND_E: case X86::COND_NE: |
| 3066 |
case X86::COND_O: case X86::COND_NO: |
3066 |
case X86::COND_O: case X86::COND_NO: |
| 3067 |
case X86::COND_P: case X86::COND_NP: |
3067 |
case X86::COND_P: case X86::COND_NP: |
| 3068 |
continue; |
3068 |
continue; |
| 3069 |
// Anything else: assume conservatively. |
3069 |
// Anything else: assume conservatively. |
| 3070 |
default: |
3070 |
default: |
| 3071 |
return false; |
3071 |
return false; |
| 3072 |
} |
3072 |
} |
| 3073 |
} |
3073 |
} |
| 3074 |
} |
3074 |
} |
| 3075 |
return true; |
3075 |
return true; |
| 3076 |
} |
3076 |
} |
| 3077 |
|
3077 |
|
| 3078 |
static bool mayUseCarryFlag(X86::CondCode CC) { |
3078 |
static bool mayUseCarryFlag(X86::CondCode CC) { |
| 3079 |
switch (CC) { |
3079 |
switch (CC) { |
| 3080 |
// Comparisons which don't examine the CF flag. |
3080 |
// Comparisons which don't examine the CF flag. |
| 3081 |
case X86::COND_O: case X86::COND_NO: |
3081 |
case X86::COND_O: case X86::COND_NO: |
| 3082 |
case X86::COND_E: case X86::COND_NE: |
3082 |
case X86::COND_E: case X86::COND_NE: |
| 3083 |
case X86::COND_S: case X86::COND_NS: |
3083 |
case X86::COND_S: case X86::COND_NS: |
| 3084 |
case X86::COND_P: case X86::COND_NP: |
3084 |
case X86::COND_P: case X86::COND_NP: |
| 3085 |
case X86::COND_L: case X86::COND_GE: |
3085 |
case X86::COND_L: case X86::COND_GE: |
| 3086 |
case X86::COND_G: case X86::COND_LE: |
3086 |
case X86::COND_G: case X86::COND_LE: |
| 3087 |
return false; |
3087 |
return false; |
| 3088 |
// Anything else: assume conservatively. |
3088 |
// Anything else: assume conservatively. |
| 3089 |
default: |
3089 |
default: |
| 3090 |
return true; |
3090 |
return true; |
| 3091 |
} |
3091 |
} |
| 3092 |
} |
3092 |
} |
| 3093 |
|
3093 |
|
| 3094 |
/// Test whether the given node which sets flags has any uses which require the |
3094 |
/// Test whether the given node which sets flags has any uses which require the |
| 3095 |
/// CF flag to be accurate. |
3095 |
/// CF flag to be accurate. |
| 3096 |
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { |
3096 |
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { |
| 3097 |
// Examine each user of the node. |
3097 |
// Examine each user of the node. |
| 3098 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
3098 |
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
| 3099 |
UI != UE; ++UI) { |
3099 |
UI != UE; ++UI) { |
| 3100 |
// Only check things that use the flags. |
3100 |
// Only check things that use the flags. |
| 3101 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
3101 |
if (UI.getUse().getResNo() != Flags.getResNo()) |
| 3102 |
continue; |
3102 |
continue; |
| 3103 |
|
3103 |
|
| 3104 |
unsigned UIOpc = UI->getOpcode(); |
3104 |
unsigned UIOpc = UI->getOpcode(); |
| 3105 |
|
3105 |
|
| 3106 |
if (UIOpc == ISD::CopyToReg) { |
3106 |
if (UIOpc == ISD::CopyToReg) { |
| 3107 |
// Only examine CopyToReg uses that copy to EFLAGS. |
3107 |
// Only examine CopyToReg uses that copy to EFLAGS. |
| 3108 |
if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
3108 |
if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS) |
| 3109 |
return false; |
3109 |
return false; |
| 3110 |
// Examine each user of the CopyToReg use. |
3110 |
// Examine each user of the CopyToReg use. |
| 3111 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); |
3111 |
for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); |
| 3112 |
FlagUI != FlagUE; ++FlagUI) { |
3112 |
FlagUI != FlagUE; ++FlagUI) { |
| 3113 |
// Only examine the Flag result. |
3113 |
// Only examine the Flag result. |
| 3114 |
if (FlagUI.getUse().getResNo() != 1) |
3114 |
if (FlagUI.getUse().getResNo() != 1) |
| 3115 |
continue; |
3115 |
continue; |
| 3116 |
// Anything unusual: assume conservatively. |
3116 |
// Anything unusual: assume conservatively. |
| 3117 |
if (!FlagUI->isMachineOpcode()) |
3117 |
if (!FlagUI->isMachineOpcode()) |
| 3118 |
return false; |
3118 |
return false; |
| 3119 |
// Examine the condition code of the user. |
3119 |
// Examine the condition code of the user. |
| 3120 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
3120 |
X86::CondCode CC = getCondFromNode(*FlagUI); |
| 3121 |
|
3121 |
|
| 3122 |
if (mayUseCarryFlag(CC)) |
3122 |
if (mayUseCarryFlag(CC)) |
| 3123 |
return false; |
3123 |
return false; |
| 3124 |
} |
3124 |
} |
| 3125 |
|
3125 |
|
| 3126 |
// This CopyToReg is ok. Move on to the next user. |
3126 |
// This CopyToReg is ok. Move on to the next user. |
| 3127 |
continue; |
3127 |
continue; |
| 3128 |
} |
3128 |
} |
| 3129 |
|
3129 |
|
| 3130 |
// This might be an unselected node. So look for the pre-isel opcodes that |
3130 |
// This might be an unselected node. So look for the pre-isel opcodes that |
| 3131 |
// use flags. |
3131 |
// use flags. |
| 3132 |
unsigned CCOpNo; |
3132 |
unsigned CCOpNo; |
| 3133 |
switch (UIOpc) { |
3133 |
switch (UIOpc) { |
| 3134 |
default: |
3134 |
default: |
| 3135 |
// Something unusual. Be conservative. |
3135 |
// Something unusual. Be conservative. |
| 3136 |
return false; |
3136 |
return false; |
| 3137 |
case X86ISD::SETCC: CCOpNo = 0; break; |
3137 |
case X86ISD::SETCC: CCOpNo = 0; break; |
| 3138 |
case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
3138 |
case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
| 3139 |
case X86ISD::CMOV: CCOpNo = 2; break; |
3139 |
case X86ISD::CMOV: CCOpNo = 2; break; |
| 3140 |
case X86ISD::BRCOND: CCOpNo = 2; break; |
3140 |
case X86ISD::BRCOND: CCOpNo = 2; break; |
| 3141 |
} |
3141 |
} |
| 3142 |
|
3142 |
|
| 3143 |
X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); |
3143 |
X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); |
| 3144 |
if (mayUseCarryFlag(CC)) |
3144 |
if (mayUseCarryFlag(CC)) |
| 3145 |
return false; |
3145 |
return false; |
| 3146 |
} |
3146 |
} |
| 3147 |
return true; |
3147 |
return true; |
| 3148 |
} |
3148 |
} |
| 3149 |
|
3149 |
|
| 3150 |
/// Check whether or not the chain ending in StoreNode is suitable for doing |
3150 |
/// Check whether or not the chain ending in StoreNode is suitable for doing |
| 3151 |
/// the {load; op; store} to modify transformation. |
3151 |
/// the {load; op; store} to modify transformation. |
| 3152 |
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, |
3152 |
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, |
| 3153 |
SDValue StoredVal, SelectionDAG *CurDAG, |
3153 |
SDValue StoredVal, SelectionDAG *CurDAG, |
| 3154 |
unsigned LoadOpNo, |
3154 |
unsigned LoadOpNo, |
| 3155 |
LoadSDNode *&LoadNode, |
3155 |
LoadSDNode *&LoadNode, |
| 3156 |
SDValue &InputChain) { |
3156 |
SDValue &InputChain) { |
| 3157 |
// Is the stored value result 0 of the operation? |
3157 |
// Is the stored value result 0 of the operation? |
| 3158 |
if (StoredVal.getResNo() != 0) return false; |
3158 |
if (StoredVal.getResNo() != 0) return false; |
| 3159 |
|
3159 |
|
| 3160 |
// Are there other uses of the operation other than the store? |
3160 |
// Are there other uses of the operation other than the store? |
| 3161 |
if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; |
3161 |
if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; |
| 3162 |
|
3162 |
|
| 3163 |
// Is the store non-extending and non-indexed? |
3163 |
// Is the store non-extending and non-indexed? |
| 3164 |
if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) |
3164 |
if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) |
| 3165 |
return false; |
3165 |
return false; |
| 3166 |
|
3166 |
|
| 3167 |
SDValue Load = StoredVal->getOperand(LoadOpNo); |
3167 |
SDValue Load = StoredVal->getOperand(LoadOpNo); |
| 3168 |
// Is the stored value a non-extending and non-indexed load? |
3168 |
// Is the stored value a non-extending and non-indexed load? |
| 3169 |
if (!ISD::isNormalLoad(Load.getNode())) return false; |
3169 |
if (!ISD::isNormalLoad(Load.getNode())) return false; |
| 3170 |
|
3170 |
|
| 3171 |
// Return LoadNode by reference. |
3171 |
// Return LoadNode by reference. |
| 3172 |
LoadNode = cast(Load); |
3172 |
LoadNode = cast(Load); |
| 3173 |
|
3173 |
|
| 3174 |
// Is store the only read of the loaded value? |
3174 |
// Is store the only read of the loaded value? |
| 3175 |
if (!Load.hasOneUse()) |
3175 |
if (!Load.hasOneUse()) |
| 3176 |
return false; |
3176 |
return false; |
| 3177 |
|
3177 |
|
| 3178 |
// Is the address of the store the same as the load? |
3178 |
// Is the address of the store the same as the load? |
| 3179 |
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || |
3179 |
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || |
| 3180 |
LoadNode->getOffset() != StoreNode->getOffset()) |
3180 |
LoadNode->getOffset() != StoreNode->getOffset()) |
| 3181 |
return false; |
3181 |
return false; |
| 3182 |
|
3182 |
|
| 3183 |
bool FoundLoad = false; |
3183 |
bool FoundLoad = false; |
| 3184 |
SmallVector ChainOps; |
3184 |
SmallVector ChainOps; |
| 3185 |
SmallVector LoopWorklist; |
3185 |
SmallVector LoopWorklist; |
| 3186 |
SmallPtrSet Visited; |
3186 |
SmallPtrSet Visited; |
| 3187 |
const unsigned int Max = 1024; |
3187 |
const unsigned int Max = 1024; |
| 3188 |
|
3188 |
|
| 3189 |
// Visualization of Load-Op-Store fusion: |
3189 |
// Visualization of Load-Op-Store fusion: |
| 3190 |
// ------------------------- |
3190 |
// ------------------------- |
| 3191 |
// Legend: |
3191 |
// Legend: |
| 3192 |
// *-lines = Chain operand dependencies. |
3192 |
// *-lines = Chain operand dependencies. |
| 3193 |
// |-lines = Normal operand dependencies. |
3193 |
// |-lines = Normal operand dependencies. |
| 3194 |
// Dependencies flow down and right. n-suffix references multiple nodes. |
3194 |
// Dependencies flow down and right. n-suffix references multiple nodes. |
| 3195 |
// |
3195 |
// |
| 3196 |
// C Xn C |
3196 |
// C Xn C |
| 3197 |
// * * * |
3197 |
// * * * |
| 3198 |
// * * * |
3198 |
// * * * |
| 3199 |
// Xn A-LD Yn TF Yn |
3199 |
// Xn A-LD Yn TF Yn |
| 3200 |
// * * \ | * | |
3200 |
// * * \ | * | |
| 3201 |
// * * \ | * | |
3201 |
// * * \ | * | |
| 3202 |
// * * \ | => A--LD_OP_ST |
3202 |
// * * \ | => A--LD_OP_ST |
| 3203 |
// * * \| \ |
3203 |
// * * \| \ |
| 3204 |
// TF OP \ |
3204 |
// TF OP \ |
| 3205 |
// * | \ Zn |
3205 |
// * | \ Zn |
| 3206 |
// * | \ |
3206 |
// * | \ |
| 3207 |
// A-ST Zn |
3207 |
// A-ST Zn |
| 3208 |
// |
3208 |
// |
| 3209 |
|
3209 |
|
| 3210 |
// This merge induced dependences from: #1: Xn -> LD, OP, Zn |
3210 |
// This merge induced dependences from: #1: Xn -> LD, OP, Zn |
| 3211 |
// #2: Yn -> LD |
3211 |
// #2: Yn -> LD |
| 3212 |
// #3: ST -> Zn |
3212 |
// #3: ST -> Zn |
| 3213 |
|
3213 |
|
| 3214 |
// Ensure the transform is safe by checking for the dual |
3214 |
// Ensure the transform is safe by checking for the dual |
| 3215 |
// dependencies to make sure we do not induce a loop. |
3215 |
// dependencies to make sure we do not induce a loop. |
| 3216 |
|
3216 |
|
| 3217 |
// As LD is a predecessor to both OP and ST we can do this by checking: |
3217 |
// As LD is a predecessor to both OP and ST we can do this by checking: |
| 3218 |
// a). if LD is a predecessor to a member of Xn or Yn. |
3218 |
// a). if LD is a predecessor to a member of Xn or Yn. |
| 3219 |
// b). if a Zn is a predecessor to ST. |
3219 |
// b). if a Zn is a predecessor to ST. |
| 3220 |
|
3220 |
|
| 3221 |
// However, (b) can only occur through being a chain predecessor to |
3221 |
// However, (b) can only occur through being a chain predecessor to |
| 3222 |
// ST, which is the same as Zn being a member or predecessor of Xn, |
3222 |
// ST, which is the same as Zn being a member or predecessor of Xn, |
| 3223 |
// which is a subset of LD being a predecessor of Xn. So it's |
3223 |
// which is a subset of LD being a predecessor of Xn. So it's |
| 3224 |
// subsumed by check (a). |
3224 |
// subsumed by check (a). |
| 3225 |
|
3225 |
|
| 3226 |
SDValue Chain = StoreNode->getChain(); |
3226 |
SDValue Chain = StoreNode->getChain(); |
| 3227 |
|
3227 |
|
| 3228 |
// Gather X elements in ChainOps. |
3228 |
// Gather X elements in ChainOps. |
| 3229 |
if (Chain == Load.getValue(1)) { |
3229 |
if (Chain == Load.getValue(1)) { |
| 3230 |
FoundLoad = true; |
3230 |
FoundLoad = true; |
| 3231 |
ChainOps.push_back(Load.getOperand(0)); |
3231 |
ChainOps.push_back(Load.getOperand(0)); |
| 3232 |
} else if (Chain.getOpcode() == ISD::TokenFactor) { |
3232 |
} else if (Chain.getOpcode() == ISD::TokenFactor) { |
| 3233 |
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { |
3233 |
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { |
| 3234 |
SDValue Op = Chain.getOperand(i); |
3234 |
SDValue Op = Chain.getOperand(i); |
| 3235 |
if (Op == Load.getValue(1)) { |
3235 |
if (Op == Load.getValue(1)) { |
| 3236 |
FoundLoad = true; |
3236 |
FoundLoad = true; |
| 3237 |
// Drop Load, but keep its chain. No cycle check necessary. |
3237 |
// Drop Load, but keep its chain. No cycle check necessary. |
| 3238 |
ChainOps.push_back(Load.getOperand(0)); |
3238 |
ChainOps.push_back(Load.getOperand(0)); |
| 3239 |
continue; |
3239 |
continue; |
| 3240 |
} |
3240 |
} |
| 3241 |
LoopWorklist.push_back(Op.getNode()); |
3241 |
LoopWorklist.push_back(Op.getNode()); |
| 3242 |
ChainOps.push_back(Op); |
3242 |
ChainOps.push_back(Op); |
| 3243 |
} |
3243 |
} |
| 3244 |
} |
3244 |
} |
| 3245 |
|
3245 |
|
| 3246 |
if (!FoundLoad) |
3246 |
if (!FoundLoad) |
| 3247 |
return false; |
3247 |
return false; |
| 3248 |
|
3248 |
|
| 3249 |
// Worklist is currently Xn. Add Yn to worklist. |
3249 |
// Worklist is currently Xn. Add Yn to worklist. |
| 3250 |
for (SDValue Op : StoredVal->ops()) |
3250 |
for (SDValue Op : StoredVal->ops()) |
| 3251 |
if (Op.getNode() != LoadNode) |
3251 |
if (Op.getNode() != LoadNode) |
| 3252 |
LoopWorklist.push_back(Op.getNode()); |
3252 |
LoopWorklist.push_back(Op.getNode()); |
| 3253 |
|
3253 |
|
| 3254 |
// Check (a) if Load is a predecessor to Xn + Yn |
3254 |
// Check (a) if Load is a predecessor to Xn + Yn |
| 3255 |
if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, |
3255 |
if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, |
| 3256 |
true)) |
3256 |
true)) |
| 3257 |
return false; |
3257 |
return false; |
| 3258 |
|
3258 |
|
| 3259 |
InputChain = |
3259 |
InputChain = |
| 3260 |
CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); |
3260 |
CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); |
| 3261 |
return true; |
3261 |
return true; |
| 3262 |
} |
3262 |
} |
| 3263 |
|
3263 |
|
| 3264 |
// Change a chain of {load; op; store} of the same value into a simple op |
3264 |
// Change a chain of {load; op; store} of the same value into a simple op |
| 3265 |
// through memory of that value, if the uses of the modified value and its |
3265 |
// through memory of that value, if the uses of the modified value and its |
| 3266 |
// address are suitable. |
3266 |
// address are suitable. |
| 3267 |
// |
3267 |
// |
| 3268 |
// The tablegen pattern memory operand pattern is currently not able to match |
3268 |
// The tablegen pattern memory operand pattern is currently not able to match |
| 3269 |
// the case where the EFLAGS on the original operation are used. |
3269 |
// the case where the EFLAGS on the original operation are used. |
| 3270 |
// |
3270 |
// |
| 3271 |
// To move this to tablegen, we'll need to improve tablegen to allow flags to |
3271 |
// To move this to tablegen, we'll need to improve tablegen to allow flags to |
| 3272 |
// be transferred from a node in the pattern to the result node, probably with |
3272 |
// be transferred from a node in the pattern to the result node, probably with |
| 3273 |
// a new keyword. For example, we have this |
3273 |
// a new keyword. For example, we have this |
| 3274 |
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", |
3274 |
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", |
| 3275 |
// [(store (add (loadi64 addr:$dst), -1), addr:$dst), |
3275 |
// [(store (add (loadi64 addr:$dst), -1), addr:$dst), |
| 3276 |
// (implicit EFLAGS)]>; |
3276 |
// (implicit EFLAGS)]>; |
| 3277 |
// but maybe need something like this |
3277 |
// but maybe need something like this |
| 3278 |
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", |
3278 |
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", |
| 3279 |
// [(store (add (loadi64 addr:$dst), -1), addr:$dst), |
3279 |
// [(store (add (loadi64 addr:$dst), -1), addr:$dst), |
| 3280 |
// (transferrable EFLAGS)]>; |
3280 |
// (transferrable EFLAGS)]>; |
| 3281 |
// |
3281 |
// |
| 3282 |
// Until then, we manually fold these and instruction select the operation |
3282 |
// Until then, we manually fold these and instruction select the operation |
| 3283 |
// here. |
3283 |
// here. |
| 3284 |
bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { |
3284 |
bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { |
| 3285 |
auto *StoreNode = cast(Node); |
3285 |
auto *StoreNode = cast(Node); |
| 3286 |
SDValue StoredVal = StoreNode->getOperand(1); |
3286 |
SDValue StoredVal = StoreNode->getOperand(1); |
| 3287 |
unsigned Opc = StoredVal->getOpcode(); |
3287 |
unsigned Opc = StoredVal->getOpcode(); |
| 3288 |
|
3288 |
|
| 3289 |
// Before we try to select anything, make sure this is memory operand size |
3289 |
// Before we try to select anything, make sure this is memory operand size |
| 3290 |
// and opcode we can handle. Note that this must match the code below that |
3290 |
// and opcode we can handle. Note that this must match the code below that |
| 3291 |
// actually lowers the opcodes. |
3291 |
// actually lowers the opcodes. |
| 3292 |
EVT MemVT = StoreNode->getMemoryVT(); |
3292 |
EVT MemVT = StoreNode->getMemoryVT(); |
| 3293 |
if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && |
3293 |
if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && |
| 3294 |
MemVT != MVT::i8) |
3294 |
MemVT != MVT::i8) |
| 3295 |
return false; |
3295 |
return false; |
| 3296 |
|
3296 |
|
| 3297 |
bool IsCommutable = false; |
3297 |
bool IsCommutable = false; |
| 3298 |
bool IsNegate = false; |
3298 |
bool IsNegate = false; |
| 3299 |
switch (Opc) { |
3299 |
switch (Opc) { |
| 3300 |
default: |
3300 |
default: |
| 3301 |
return false; |
3301 |
return false; |
| 3302 |
case X86ISD::SUB: |
3302 |
case X86ISD::SUB: |
| 3303 |
IsNegate = isNullConstant(StoredVal.getOperand(0)); |
3303 |
IsNegate = isNullConstant(StoredVal.getOperand(0)); |
| 3304 |
break; |
3304 |
break; |
| 3305 |
case X86ISD::SBB: |
3305 |
case X86ISD::SBB: |
| 3306 |
break; |
3306 |
break; |
| 3307 |
case X86ISD::ADD: |
3307 |
case X86ISD::ADD: |
| 3308 |
case X86ISD::ADC: |
3308 |
case X86ISD::ADC: |
| 3309 |
case X86ISD::AND: |
3309 |
case X86ISD::AND: |
| 3310 |
case X86ISD::OR: |
3310 |
case X86ISD::OR: |
| 3311 |
case X86ISD::XOR: |
3311 |
case X86ISD::XOR: |
| 3312 |
IsCommutable = true; |
3312 |
IsCommutable = true; |
| 3313 |
break; |
3313 |
break; |
| 3314 |
} |
3314 |
} |
| 3315 |
|
3315 |
|
| 3316 |
unsigned LoadOpNo = IsNegate ? 1 : 0; |
3316 |
unsigned LoadOpNo = IsNegate ? 1 : 0; |
| 3317 |
LoadSDNode *LoadNode = nullptr; |
3317 |
LoadSDNode *LoadNode = nullptr; |
| 3318 |
SDValue InputChain; |
3318 |
SDValue InputChain; |
| 3319 |
if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
3319 |
if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
| 3320 |
LoadNode, InputChain)) { |
3320 |
LoadNode, InputChain)) { |
| 3321 |
if (!IsCommutable) |
3321 |
if (!IsCommutable) |
| 3322 |
return false; |
3322 |
return false; |
| 3323 |
|
3323 |
|
| 3324 |
// This operation is commutable, try the other operand. |
3324 |
// This operation is commutable, try the other operand. |
| 3325 |
LoadOpNo = 1; |
3325 |
LoadOpNo = 1; |
| 3326 |
if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
3326 |
if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
| 3327 |
LoadNode, InputChain)) |
3327 |
LoadNode, InputChain)) |
| 3328 |
return false; |
3328 |
return false; |
| 3329 |
} |
3329 |
} |
| 3330 |
|
3330 |
|
| 3331 |
SDValue Base, Scale, Index, Disp, Segment; |
3331 |
SDValue Base, Scale, Index, Disp, Segment; |
| 3332 |
if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, |
3332 |
if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, |
| 3333 |
Segment)) |
3333 |
Segment)) |
| 3334 |
return false; |
3334 |
return false; |
| 3335 |
|
3335 |
|
| 3336 |
auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, |
3336 |
auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, |
| 3337 |
unsigned Opc8) { |
3337 |
unsigned Opc8) { |
| 3338 |
switch (MemVT.getSimpleVT().SimpleTy) { |
3338 |
switch (MemVT.getSimpleVT().SimpleTy) { |
| 3339 |
case MVT::i64: |
3339 |
case MVT::i64: |
| 3340 |
return Opc64; |
3340 |
return Opc64; |
| 3341 |
case MVT::i32: |
3341 |
case MVT::i32: |
| 3342 |
return Opc32; |
3342 |
return Opc32; |
| 3343 |
case MVT::i16: |
3343 |
case MVT::i16: |
| 3344 |
return Opc16; |
3344 |
return Opc16; |
| 3345 |
case MVT::i8: |
3345 |
case MVT::i8: |
| 3346 |
return Opc8; |
3346 |
return Opc8; |
| 3347 |
default: |
3347 |
default: |
| 3348 |
llvm_unreachable("Invalid size!"); |
3348 |
llvm_unreachable("Invalid size!"); |
| 3349 |
} |
3349 |
} |
| 3350 |
}; |
3350 |
}; |
| 3351 |
|
3351 |
|
| 3352 |
MachineSDNode *Result; |
3352 |
MachineSDNode *Result; |
| 3353 |
switch (Opc) { |
3353 |
switch (Opc) { |
| 3354 |
case X86ISD::SUB: |
3354 |
case X86ISD::SUB: |
| 3355 |
// Handle negate. |
3355 |
// Handle negate. |
| 3356 |
if (IsNegate) { |
3356 |
if (IsNegate) { |
| 3357 |
unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, |
3357 |
unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, |
| 3358 |
X86::NEG8m); |
3358 |
X86::NEG8m); |
| 3359 |
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
3359 |
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
| 3360 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
3360 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
| 3361 |
MVT::Other, Ops); |
3361 |
MVT::Other, Ops); |
| 3362 |
break; |
3362 |
break; |
| 3363 |
} |
3363 |
} |
| 3364 |
[[fallthrough]]; |
3364 |
[[fallthrough]]; |
| 3365 |
case X86ISD::ADD: |
3365 |
case X86ISD::ADD: |
| 3366 |
// Try to match inc/dec. |
3366 |
// Try to match inc/dec. |
| 3367 |
if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { |
3367 |
if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { |
| 3368 |
bool IsOne = isOneConstant(StoredVal.getOperand(1)); |
3368 |
bool IsOne = isOneConstant(StoredVal.getOperand(1)); |
| 3369 |
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); |
3369 |
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); |
| 3370 |
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. |
3370 |
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. |
| 3371 |
if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { |
3371 |
if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { |
| 3372 |
unsigned NewOpc = |
3372 |
unsigned NewOpc = |
| 3373 |
((Opc == X86ISD::ADD) == IsOne) |
3373 |
((Opc == X86ISD::ADD) == IsOne) |
| 3374 |
? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) |
3374 |
? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) |
| 3375 |
: SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); |
3375 |
: SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); |
| 3376 |
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
3376 |
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
| 3377 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
3377 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
| 3378 |
MVT::Other, Ops); |
3378 |
MVT::Other, Ops); |
| 3379 |
break; |
3379 |
break; |
| 3380 |
} |
3380 |
} |
| 3381 |
} |
3381 |
} |
| 3382 |
[[fallthrough]]; |
3382 |
[[fallthrough]]; |
| 3383 |
case X86ISD::ADC: |
3383 |
case X86ISD::ADC: |
| 3384 |
case X86ISD::SBB: |
3384 |
case X86ISD::SBB: |
| 3385 |
case X86ISD::AND: |
3385 |
case X86ISD::AND: |
| 3386 |
case X86ISD::OR: |
3386 |
case X86ISD::OR: |
| 3387 |
case X86ISD::XOR: { |
3387 |
case X86ISD::XOR: { |
| 3388 |
auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { |
3388 |
auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { |
| 3389 |
switch (Opc) { |
3389 |
switch (Opc) { |
| 3390 |
case X86ISD::ADD: |
3390 |
case X86ISD::ADD: |
| 3391 |
return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, |
3391 |
return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, |
| 3392 |
X86::ADD8mr); |
3392 |
X86::ADD8mr); |
| 3393 |
case X86ISD::ADC: |
3393 |
case X86ISD::ADC: |
| 3394 |
return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, |
3394 |
return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, |
| 3395 |
X86::ADC8mr); |
3395 |
X86::ADC8mr); |
| 3396 |
case X86ISD::SUB: |
3396 |
case X86ISD::SUB: |
| 3397 |
return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, |
3397 |
return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, |
| 3398 |
X86::SUB8mr); |
3398 |
X86::SUB8mr); |
| 3399 |
case X86ISD::SBB: |
3399 |
case X86ISD::SBB: |
| 3400 |
return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, |
3400 |
return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, |
| 3401 |
X86::SBB8mr); |
3401 |
X86::SBB8mr); |
| 3402 |
case X86ISD::AND: |
3402 |
case X86ISD::AND: |
| 3403 |
return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, |
3403 |
return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, |
| 3404 |
X86::AND8mr); |
3404 |
X86::AND8mr); |
| 3405 |
case X86ISD::OR: |
3405 |
case X86ISD::OR: |
| 3406 |
return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); |
3406 |
return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); |
| 3407 |
case X86ISD::XOR: |
3407 |
case X86ISD::XOR: |
| 3408 |
return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, |
3408 |
return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, |
| 3409 |
X86::XOR8mr); |
3409 |
X86::XOR8mr); |
| 3410 |
default: |
3410 |
default: |
| 3411 |
llvm_unreachable("Invalid opcode!"); |
3411 |
llvm_unreachable("Invalid opcode!"); |
| 3412 |
} |
3412 |
} |
| 3413 |
}; |
3413 |
}; |
| 3414 |
auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { |
3414 |
auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { |
| 3415 |
switch (Opc) { |
3415 |
switch (Opc) { |
| 3416 |
case X86ISD::ADD: |
3416 |
case X86ISD::ADD: |
| 3417 |
return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, |
3417 |
return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, |
| 3418 |
X86::ADD8mi); |
3418 |
X86::ADD8mi); |
| 3419 |
case X86ISD::ADC: |
3419 |
case X86ISD::ADC: |
| 3420 |
return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, |
3420 |
return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, |
| 3421 |
X86::ADC8mi); |
3421 |
X86::ADC8mi); |
| 3422 |
case X86ISD::SUB: |
3422 |
case X86ISD::SUB: |
| 3423 |
return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, |
3423 |
return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, |
| 3424 |
X86::SUB8mi); |
3424 |
X86::SUB8mi); |
| 3425 |
case X86ISD::SBB: |
3425 |
case X86ISD::SBB: |
| 3426 |
return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, |
3426 |
return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, |
| 3427 |
X86::SBB8mi); |
3427 |
X86::SBB8mi); |
| 3428 |
case X86ISD::AND: |
3428 |
case X86ISD::AND: |
| 3429 |
return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, |
3429 |
return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, |
| 3430 |
X86::AND8mi); |
3430 |
X86::AND8mi); |
| 3431 |
case X86ISD::OR: |
3431 |
case X86ISD::OR: |
| 3432 |
return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, |
3432 |
return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, |
| 3433 |
X86::OR8mi); |
3433 |
X86::OR8mi); |
| 3434 |
case X86ISD::XOR: |
3434 |
case X86ISD::XOR: |
| 3435 |
return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, |
3435 |
return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, |
| 3436 |
X86::XOR8mi); |
3436 |
X86::XOR8mi); |
| 3437 |
default: |
3437 |
default: |
| 3438 |
llvm_unreachable("Invalid opcode!"); |
3438 |
llvm_unreachable("Invalid opcode!"); |
| 3439 |
} |
3439 |
} |
| 3440 |
}; |
3440 |
}; |
| 3441 |
|
3441 |
|
| 3442 |
unsigned NewOpc = SelectRegOpcode(Opc); |
3442 |
unsigned NewOpc = SelectRegOpcode(Opc); |
| 3443 |
SDValue Operand = StoredVal->getOperand(1-LoadOpNo); |
3443 |
SDValue Operand = StoredVal->getOperand(1-LoadOpNo); |
| 3444 |
|
3444 |
|
| 3445 |
// See if the operand is a constant that we can fold into an immediate |
3445 |
// See if the operand is a constant that we can fold into an immediate |
| 3446 |
// operand. |
3446 |
// operand. |
| 3447 |
if (auto *OperandC = dyn_cast(Operand)) { |
3447 |
if (auto *OperandC = dyn_cast(Operand)) { |
| 3448 |
int64_t OperandV = OperandC->getSExtValue(); |
3448 |
int64_t OperandV = OperandC->getSExtValue(); |
| 3449 |
|
3449 |
|
| 3450 |
// Check if we can shrink the operand enough to fit in an immediate (or |
3450 |
// Check if we can shrink the operand enough to fit in an immediate (or |
| 3451 |
// fit into a smaller immediate) by negating it and switching the |
3451 |
// fit into a smaller immediate) by negating it and switching the |
| 3452 |
// operation. |
3452 |
// operation. |
| 3453 |
if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && |
3453 |
if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && |
| 3454 |
((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) || |
3454 |
((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) || |
| 3455 |
(MemVT == MVT::i64 && !isInt<32>(OperandV) && |
3455 |
(MemVT == MVT::i64 && !isInt<32>(OperandV) && |
| 3456 |
isInt<32>(-OperandV))) && |
3456 |
isInt<32>(-OperandV))) && |
| 3457 |
hasNoCarryFlagUses(StoredVal.getValue(1))) { |
3457 |
hasNoCarryFlagUses(StoredVal.getValue(1))) { |
| 3458 |
OperandV = -OperandV; |
3458 |
OperandV = -OperandV; |
| 3459 |
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; |
3459 |
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; |
| 3460 |
} |
3460 |
} |
| 3461 |
|
3461 |
|
| 3462 |
if (MemVT != MVT::i64 || isInt<32>(OperandV)) { |
3462 |
if (MemVT != MVT::i64 || isInt<32>(OperandV)) { |
| 3463 |
Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); |
3463 |
Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); |
| 3464 |
NewOpc = SelectImmOpcode(Opc); |
3464 |
NewOpc = SelectImmOpcode(Opc); |
| 3465 |
} |
3465 |
} |
| 3466 |
} |
3466 |
} |
| 3467 |
|
3467 |
|
| 3468 |
if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { |
3468 |
if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { |
| 3469 |
SDValue CopyTo = |
3469 |
SDValue CopyTo = |
| 3470 |
CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, |
3470 |
CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, |
| 3471 |
StoredVal.getOperand(2), SDValue()); |
3471 |
StoredVal.getOperand(2), SDValue()); |
| 3472 |
|
3472 |
|
| 3473 |
const SDValue Ops[] = {Base, Scale, Index, Disp, |
3473 |
const SDValue Ops[] = {Base, Scale, Index, Disp, |
| 3474 |
Segment, Operand, CopyTo, CopyTo.getValue(1)}; |
3474 |
Segment, Operand, CopyTo, CopyTo.getValue(1)}; |
| 3475 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
3475 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
| 3476 |
Ops); |
3476 |
Ops); |
| 3477 |
} else { |
3477 |
} else { |
| 3478 |
const SDValue Ops[] = {Base, Scale, Index, Disp, |
3478 |
const SDValue Ops[] = {Base, Scale, Index, Disp, |
| 3479 |
Segment, Operand, InputChain}; |
3479 |
Segment, Operand, InputChain}; |
| 3480 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
3480 |
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
| 3481 |
Ops); |
3481 |
Ops); |
| 3482 |
} |
3482 |
} |
| 3483 |
break; |
3483 |
break; |
| 3484 |
} |
3484 |
} |
| 3485 |
default: |
3485 |
default: |
| 3486 |
llvm_unreachable("Invalid opcode!"); |
3486 |
llvm_unreachable("Invalid opcode!"); |
| 3487 |
} |
3487 |
} |
| 3488 |
|
3488 |
|
| 3489 |
MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), |
3489 |
MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), |
| 3490 |
LoadNode->getMemOperand()}; |
3490 |
LoadNode->getMemOperand()}; |
| 3491 |
CurDAG->setNodeMemRefs(Result, MemOps); |
3491 |
CurDAG->setNodeMemRefs(Result, MemOps); |
| 3492 |
|
3492 |
|
| 3493 |
// Update Load Chain uses as well. |
3493 |
// Update Load Chain uses as well. |
| 3494 |
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); |
3494 |
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); |
| 3495 |
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); |
3495 |
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); |
| 3496 |
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); |
3496 |
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); |
| 3497 |
CurDAG->RemoveDeadNode(Node); |
3497 |
CurDAG->RemoveDeadNode(Node); |
| 3498 |
return true; |
3498 |
return true; |
| 3499 |
} |
3499 |
} |
| 3500 |
|
3500 |
|
| 3501 |
// See if this is an X & Mask that we can match to BEXTR/BZHI. |
3501 |
// See if this is an X & Mask that we can match to BEXTR/BZHI. |
| 3502 |
// Where Mask is one of the following patterns: |
3502 |
// Where Mask is one of the following patterns: |
| 3503 |
// a) x & (1 << nbits) - 1 |
3503 |
// a) x & (1 << nbits) - 1 |
| 3504 |
// b) x & ~(-1 << nbits) |
3504 |
// b) x & ~(-1 << nbits) |
| 3505 |
// c) x & (-1 >> (32 - y)) |
3505 |
// c) x & (-1 >> (32 - y)) |
| 3506 |
// d) x << (32 - y) >> (32 - y) |
3506 |
// d) x << (32 - y) >> (32 - y) |
| 3507 |
// e) (1 << nbits) - 1 |
3507 |
// e) (1 << nbits) - 1 |
| 3508 |
bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { |
3508 |
bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { |
| 3509 |
assert( |
3509 |
assert( |
| 3510 |
(Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND || |
3510 |
(Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND || |
| 3511 |
Node->getOpcode() == ISD::SRL) && |
3511 |
Node->getOpcode() == ISD::SRL) && |
| 3512 |
"Should be either an and-mask, or right-shift after clearing high bits."); |
3512 |
"Should be either an and-mask, or right-shift after clearing high bits."); |
| 3513 |
|
3513 |
|
| 3514 |
// BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. |
3514 |
// BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. |
| 3515 |
if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) |
3515 |
if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) |
| 3516 |
return false; |
3516 |
return false; |
| 3517 |
|
3517 |
|
| 3518 |
MVT NVT = Node->getSimpleValueType(0); |
3518 |
MVT NVT = Node->getSimpleValueType(0); |
| 3519 |
|
3519 |
|
| 3520 |
// Only supported for 32 and 64 bits. |
3520 |
// Only supported for 32 and 64 bits. |
| 3521 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
3521 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
| 3522 |
return false; |
3522 |
return false; |
| 3523 |
|
3523 |
|
| 3524 |
SDValue NBits; |
3524 |
SDValue NBits; |
| 3525 |
bool NegateNBits; |
3525 |
bool NegateNBits; |
| 3526 |
|
3526 |
|
| 3527 |
// If we have BMI2's BZHI, we are ok with muti-use patterns. |
3527 |
// If we have BMI2's BZHI, we are ok with muti-use patterns. |
| 3528 |
// Else, if we only have BMI1's BEXTR, we require one-use. |
3528 |
// Else, if we only have BMI1's BEXTR, we require one-use. |
| 3529 |
const bool AllowExtraUsesByDefault = Subtarget->hasBMI2(); |
3529 |
const bool AllowExtraUsesByDefault = Subtarget->hasBMI2(); |
| 3530 |
auto checkUses = [AllowExtraUsesByDefault]( |
3530 |
auto checkUses = [AllowExtraUsesByDefault]( |
| 3531 |
SDValue Op, unsigned NUses, |
3531 |
SDValue Op, unsigned NUses, |
| 3532 |
std::optional AllowExtraUses) { |
3532 |
std::optional AllowExtraUses) { |
| 3533 |
return AllowExtraUses.value_or(AllowExtraUsesByDefault) || |
3533 |
return AllowExtraUses.value_or(AllowExtraUsesByDefault) || |
| 3534 |
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); |
3534 |
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); |
| 3535 |
}; |
3535 |
}; |
| 3536 |
auto checkOneUse = [checkUses](SDValue Op, |
3536 |
auto checkOneUse = [checkUses](SDValue Op, |
| 3537 |
std::optional AllowExtraUses = |
3537 |
std::optional AllowExtraUses = |
| 3538 |
std::nullopt) { |
3538 |
std::nullopt) { |
| 3539 |
return checkUses(Op, 1, AllowExtraUses); |
3539 |
return checkUses(Op, 1, AllowExtraUses); |
| 3540 |
}; |
3540 |
}; |
| 3541 |
auto checkTwoUse = [checkUses](SDValue Op, |
3541 |
auto checkTwoUse = [checkUses](SDValue Op, |
| 3542 |
std::optional AllowExtraUses = |
3542 |
std::optional AllowExtraUses = |
| 3543 |
std::nullopt) { |
3543 |
std::nullopt) { |
| 3544 |
return checkUses(Op, 2, AllowExtraUses); |
3544 |
return checkUses(Op, 2, AllowExtraUses); |
| 3545 |
}; |
3545 |
}; |
| 3546 |
|
3546 |
|
| 3547 |
auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { |
3547 |
auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { |
| 3548 |
if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { |
3548 |
if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { |
| 3549 |
assert(V.getSimpleValueType() == MVT::i32 && |
3549 |
assert(V.getSimpleValueType() == MVT::i32 && |
| 3550 |
V.getOperand(0).getSimpleValueType() == MVT::i64 && |
3550 |
V.getOperand(0).getSimpleValueType() == MVT::i64 && |
| 3551 |
"Expected i64 -> i32 truncation"); |
3551 |
"Expected i64 -> i32 truncation"); |
| 3552 |
V = V.getOperand(0); |
3552 |
V = V.getOperand(0); |
| 3553 |
} |
3553 |
} |
| 3554 |
return V; |
3554 |
return V; |
| 3555 |
}; |
3555 |
}; |
| 3556 |
|
3556 |
|
| 3557 |
// a) x & ((1 << nbits) + (-1)) |
3557 |
// a) x & ((1 << nbits) + (-1)) |
| 3558 |
auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits, |
3558 |
auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits, |
| 3559 |
&NegateNBits](SDValue Mask) -> bool { |
3559 |
&NegateNBits](SDValue Mask) -> bool { |
| 3560 |
// Match `add`. Must only have one use! |
3560 |
// Match `add`. Must only have one use! |
| 3561 |
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) |
3561 |
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) |
| 3562 |
return false; |
3562 |
return false; |
| 3563 |
// We should be adding all-ones constant (i.e. subtracting one.) |
3563 |
// We should be adding all-ones constant (i.e. subtracting one.) |
| 3564 |
if (!isAllOnesConstant(Mask->getOperand(1))) |
3564 |
if (!isAllOnesConstant(Mask->getOperand(1))) |
| 3565 |
return false; |
3565 |
return false; |
| 3566 |
// Match `1 << nbits`. Might be truncated. Must only have one use! |
3566 |
// Match `1 << nbits`. Might be truncated. Must only have one use! |
| 3567 |
SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
3567 |
SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
| 3568 |
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
3568 |
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
| 3569 |
return false; |
3569 |
return false; |
| 3570 |
if (!isOneConstant(M0->getOperand(0))) |
3570 |
if (!isOneConstant(M0->getOperand(0))) |
| 3571 |
return false; |
3571 |
return false; |
| 3572 |
NBits = M0->getOperand(1); |
3572 |
NBits = M0->getOperand(1); |
| 3573 |
NegateNBits = false; |
3573 |
NegateNBits = false; |
| 3574 |
return true; |
3574 |
return true; |
| 3575 |
}; |
3575 |
}; |
| 3576 |
|
3576 |
|
| 3577 |
auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { |
3577 |
auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { |
| 3578 |
V = peekThroughOneUseTruncation(V); |
3578 |
V = peekThroughOneUseTruncation(V); |
| 3579 |
return CurDAG->MaskedValueIsAllOnes( |
3579 |
return CurDAG->MaskedValueIsAllOnes( |
| 3580 |
V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(), |
3580 |
V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(), |
| 3581 |
NVT.getSizeInBits())); |
3581 |
NVT.getSizeInBits())); |
| 3582 |
}; |
3582 |
}; |
| 3583 |
|
3583 |
|
| 3584 |
// b) x & ~(-1 << nbits) |
3584 |
// b) x & ~(-1 << nbits) |
| 3585 |
auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, |
3585 |
auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, |
| 3586 |
&NBits, &NegateNBits](SDValue Mask) -> bool { |
3586 |
&NBits, &NegateNBits](SDValue Mask) -> bool { |
| 3587 |
// Match `~()`. Must only have one use! |
3587 |
// Match `~()`. Must only have one use! |
| 3588 |
if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) |
3588 |
if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) |
| 3589 |
return false; |
3589 |
return false; |
| 3590 |
// The -1 only has to be all-ones for the final Node's NVT. |
3590 |
// The -1 only has to be all-ones for the final Node's NVT. |
| 3591 |
if (!isAllOnes(Mask->getOperand(1))) |
3591 |
if (!isAllOnes(Mask->getOperand(1))) |
| 3592 |
return false; |
3592 |
return false; |
| 3593 |
// Match `-1 << nbits`. Might be truncated. Must only have one use! |
3593 |
// Match `-1 << nbits`. Might be truncated. Must only have one use! |
| 3594 |
SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
3594 |
SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
| 3595 |
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
3595 |
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
| 3596 |
return false; |
3596 |
return false; |
| 3597 |
// The -1 only has to be all-ones for the final Node's NVT. |
3597 |
// The -1 only has to be all-ones for the final Node's NVT. |
| 3598 |
if (!isAllOnes(M0->getOperand(0))) |
3598 |
if (!isAllOnes(M0->getOperand(0))) |
| 3599 |
return false; |
3599 |
return false; |
| 3600 |
NBits = M0->getOperand(1); |
3600 |
NBits = M0->getOperand(1); |
| 3601 |
NegateNBits = false; |
3601 |
NegateNBits = false; |
| 3602 |
return true; |
3602 |
return true; |
| 3603 |
}; |
3603 |
}; |
| 3604 |
|
3604 |
|
| 3605 |
// Try to match potentially-truncated shift amount as `(bitwidth - y)`, |
3605 |
// Try to match potentially-truncated shift amount as `(bitwidth - y)`, |
| 3606 |
// or leave the shift amount as-is, but then we'll have to negate it. |
3606 |
// or leave the shift amount as-is, but then we'll have to negate it. |
| 3607 |
auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt, |
3607 |
auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt, |
| 3608 |
unsigned Bitwidth) { |
3608 |
unsigned Bitwidth) { |
| 3609 |
NBits = ShiftAmt; |
3609 |
NBits = ShiftAmt; |
| 3610 |
NegateNBits = true; |
3610 |
NegateNBits = true; |
| 3611 |
// Skip over a truncate of the shift amount, if any. |
3611 |
// Skip over a truncate of the shift amount, if any. |
| 3612 |
if (NBits.getOpcode() == ISD::TRUNCATE) |
3612 |
if (NBits.getOpcode() == ISD::TRUNCATE) |
| 3613 |
NBits = NBits.getOperand(0); |
3613 |
NBits = NBits.getOperand(0); |
| 3614 |
// Try to match the shift amount as (bitwidth - y). It should go away, too. |
3614 |
// Try to match the shift amount as (bitwidth - y). It should go away, too. |
| 3615 |
// If it doesn't match, that's fine, we'll just negate it ourselves. |
3615 |
// If it doesn't match, that's fine, we'll just negate it ourselves. |
| 3616 |
if (NBits.getOpcode() != ISD::SUB) |
3616 |
if (NBits.getOpcode() != ISD::SUB) |
| 3617 |
return; |
3617 |
return; |
| 3618 |
auto *V0 = dyn_cast(NBits.getOperand(0)); |
3618 |
auto *V0 = dyn_cast(NBits.getOperand(0)); |
| 3619 |
if (!V0 || V0->getZExtValue() != Bitwidth) |
3619 |
if (!V0 || V0->getZExtValue() != Bitwidth) |
| 3620 |
return; |
3620 |
return; |
| 3621 |
NBits = NBits.getOperand(1); |
3621 |
NBits = NBits.getOperand(1); |
| 3622 |
NegateNBits = false; |
3622 |
NegateNBits = false; |
| 3623 |
}; |
3623 |
}; |
| 3624 |
|
3624 |
|
| 3625 |
// c) x & (-1 >> z) but then we'll have to subtract z from bitwidth |
3625 |
// c) x & (-1 >> z) but then we'll have to subtract z from bitwidth |
| 3626 |
// or |
3626 |
// or |
| 3627 |
// c) x & (-1 >> (32 - y)) |
3627 |
// c) x & (-1 >> (32 - y)) |
| 3628 |
auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits, |
3628 |
auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits, |
| 3629 |
canonicalizeShiftAmt](SDValue Mask) -> bool { |
3629 |
canonicalizeShiftAmt](SDValue Mask) -> bool { |
| 3630 |
// The mask itself may be truncated. |
3630 |
// The mask itself may be truncated. |
| 3631 |
Mask = peekThroughOneUseTruncation(Mask); |
3631 |
Mask = peekThroughOneUseTruncation(Mask); |
| 3632 |
unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); |
3632 |
unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); |
| 3633 |
// Match `l>>`. Must only have one use! |
3633 |
// Match `l>>`. Must only have one use! |
| 3634 |
if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) |
3634 |
if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) |
| 3635 |
return false; |
3635 |
return false; |
| 3636 |
// We should be shifting truly all-ones constant. |
3636 |
// We should be shifting truly all-ones constant. |
| 3637 |
if (!isAllOnesConstant(Mask.getOperand(0))) |
3637 |
if (!isAllOnesConstant(Mask.getOperand(0))) |
| 3638 |
return false; |
3638 |
return false; |
| 3639 |
SDValue M1 = Mask.getOperand(1); |
3639 |
SDValue M1 = Mask.getOperand(1); |
| 3640 |
// The shift amount should not be used externally. |
3640 |
// The shift amount should not be used externally. |
| 3641 |
if (!checkOneUse(M1)) |
3641 |
if (!checkOneUse(M1)) |
| 3642 |
return false; |
3642 |
return false; |
| 3643 |
canonicalizeShiftAmt(M1, Bitwidth); |
3643 |
canonicalizeShiftAmt(M1, Bitwidth); |
| 3644 |
// Pattern c. is non-canonical, and is expanded into pattern d. iff there |
3644 |
// Pattern c. is non-canonical, and is expanded into pattern d. iff there |
| 3645 |
// is no extra use of the mask. Clearly, there was one since we are here. |
3645 |
// is no extra use of the mask. Clearly, there was one since we are here. |
| 3646 |
// But at the same time, if we need to negate the shift amount, |
3646 |
// But at the same time, if we need to negate the shift amount, |
| 3647 |
// then we don't want the mask to stick around, else it's unprofitable. |
3647 |
// then we don't want the mask to stick around, else it's unprofitable. |
| 3648 |
return !NegateNBits; |
3648 |
return !NegateNBits; |
| 3649 |
}; |
3649 |
}; |
| 3650 |
|
3650 |
|
| 3651 |
SDValue X; |
3651 |
SDValue X; |
| 3652 |
|
3652 |
|
| 3653 |
// d) x << z >> z but then we'll have to subtract z from bitwidth |
3653 |
// d) x << z >> z but then we'll have to subtract z from bitwidth |
| 3654 |
// or |
3654 |
// or |
| 3655 |
// d) x << (32 - y) >> (32 - y) |
3655 |
// d) x << (32 - y) >> (32 - y) |
| 3656 |
auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt, |
3656 |
auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt, |
| 3657 |
AllowExtraUsesByDefault, &NegateNBits, |
3657 |
AllowExtraUsesByDefault, &NegateNBits, |
| 3658 |
&X](SDNode *Node) -> bool { |
3658 |
&X](SDNode *Node) -> bool { |
| 3659 |
if (Node->getOpcode() != ISD::SRL) |
3659 |
if (Node->getOpcode() != ISD::SRL) |
| 3660 |
return false; |
3660 |
return false; |
| 3661 |
SDValue N0 = Node->getOperand(0); |
3661 |
SDValue N0 = Node->getOperand(0); |
| 3662 |
if (N0->getOpcode() != ISD::SHL) |
3662 |
if (N0->getOpcode() != ISD::SHL) |
| 3663 |
return false; |
3663 |
return false; |
| 3664 |
unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); |
3664 |
unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); |
| 3665 |
SDValue N1 = Node->getOperand(1); |
3665 |
SDValue N1 = Node->getOperand(1); |
| 3666 |
SDValue N01 = N0->getOperand(1); |
3666 |
SDValue N01 = N0->getOperand(1); |
| 3667 |
// Both of the shifts must be by the exact same value. |
3667 |
// Both of the shifts must be by the exact same value. |
| 3668 |
if (N1 != N01) |
3668 |
if (N1 != N01) |
| 3669 |
return false; |
3669 |
return false; |
| 3670 |
canonicalizeShiftAmt(N1, Bitwidth); |
3670 |
canonicalizeShiftAmt(N1, Bitwidth); |
| 3671 |
// There should not be any external uses of the inner shift / shift amount. |
3671 |
// There should not be any external uses of the inner shift / shift amount. |
| 3672 |
// Note that while we are generally okay with external uses given BMI2, |
3672 |
// Note that while we are generally okay with external uses given BMI2, |
| 3673 |
// iff we need to negate the shift amount, we are not okay with extra uses. |
3673 |
// iff we need to negate the shift amount, we are not okay with extra uses. |
| 3674 |
const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits; |
3674 |
const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits; |
| 3675 |
if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses)) |
3675 |
if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses)) |
| 3676 |
return false; |
3676 |
return false; |
| 3677 |
X = N0->getOperand(0); |
3677 |
X = N0->getOperand(0); |
| 3678 |
return true; |
3678 |
return true; |
| 3679 |
}; |
3679 |
}; |
| 3680 |
|
3680 |
|
| 3681 |
auto matchLowBitMask = [matchPatternA, matchPatternB, |
3681 |
auto matchLowBitMask = [matchPatternA, matchPatternB, |
| 3682 |
matchPatternC](SDValue Mask) -> bool { |
3682 |
matchPatternC](SDValue Mask) -> bool { |
| 3683 |
return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); |
3683 |
return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); |
| 3684 |
}; |
3684 |
}; |
| 3685 |
|
3685 |
|
| 3686 |
if (Node->getOpcode() == ISD::AND) { |
3686 |
if (Node->getOpcode() == ISD::AND) { |
| 3687 |
X = Node->getOperand(0); |
3687 |
X = Node->getOperand(0); |
| 3688 |
SDValue Mask = Node->getOperand(1); |
3688 |
SDValue Mask = Node->getOperand(1); |
| 3689 |
|
3689 |
|
| 3690 |
if (matchLowBitMask(Mask)) { |
3690 |
if (matchLowBitMask(Mask)) { |
| 3691 |
// Great. |
3691 |
// Great. |
| 3692 |
} else { |
3692 |
} else { |
| 3693 |
std::swap(X, Mask); |
3693 |
std::swap(X, Mask); |
| 3694 |
if (!matchLowBitMask(Mask)) |
3694 |
if (!matchLowBitMask(Mask)) |
| 3695 |
return false; |
3695 |
return false; |
| 3696 |
} |
3696 |
} |
| 3697 |
} else if (matchLowBitMask(SDValue(Node, 0))) { |
3697 |
} else if (matchLowBitMask(SDValue(Node, 0))) { |
| 3698 |
X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT); |
3698 |
X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT); |
| 3699 |
} else if (!matchPatternD(Node)) |
3699 |
} else if (!matchPatternD(Node)) |
| 3700 |
return false; |
3700 |
return false; |
| 3701 |
|
3701 |
|
| 3702 |
// If we need to negate the shift amount, require BMI2 BZHI support. |
3702 |
// If we need to negate the shift amount, require BMI2 BZHI support. |
| 3703 |
// It's just too unprofitable for BMI1 BEXTR. |
3703 |
// It's just too unprofitable for BMI1 BEXTR. |
| 3704 |
if (NegateNBits && !Subtarget->hasBMI2()) |
3704 |
if (NegateNBits && !Subtarget->hasBMI2()) |
| 3705 |
return false; |
3705 |
return false; |
| 3706 |
|
3706 |
|
| 3707 |
SDLoc DL(Node); |
3707 |
SDLoc DL(Node); |
| 3708 |
|
3708 |
|
| 3709 |
// Truncate the shift amount. |
3709 |
// Truncate the shift amount. |
| 3710 |
NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); |
3710 |
NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); |
| 3711 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3711 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
| 3712 |
|
3712 |
|
| 3713 |
// Insert 8-bit NBits into lowest 8 bits of 32-bit register. |
3713 |
// Insert 8-bit NBits into lowest 8 bits of 32-bit register. |
| 3714 |
// All the other bits are undefined, we do not care about them. |
3714 |
// All the other bits are undefined, we do not care about them. |
| 3715 |
SDValue ImplDef = SDValue( |
3715 |
SDValue ImplDef = SDValue( |
| 3716 |
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); |
3716 |
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); |
| 3717 |
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); |
3717 |
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); |
| 3718 |
|
3718 |
|
| 3719 |
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); |
3719 |
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); |
| 3720 |
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); |
3720 |
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); |
| 3721 |
NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, |
3721 |
NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, |
| 3722 |
MVT::i32, ImplDef, NBits, SRIdxVal), |
3722 |
MVT::i32, ImplDef, NBits, SRIdxVal), |
| 3723 |
0); |
3723 |
0); |
| 3724 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3724 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
| 3725 |
|
3725 |
|
| 3726 |
// We might have matched the amount of high bits to be cleared, |
3726 |
// We might have matched the amount of high bits to be cleared, |
| 3727 |
// but we want the amount of low bits to be kept, so negate it then. |
3727 |
// but we want the amount of low bits to be kept, so negate it then. |
| 3728 |
if (NegateNBits) { |
3728 |
if (NegateNBits) { |
| 3729 |
SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32); |
3729 |
SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32); |
| 3730 |
insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC); |
3730 |
insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC); |
| 3731 |
|
3731 |
|
| 3732 |
NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits); |
3732 |
NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits); |
| 3733 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3733 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
| 3734 |
} |
3734 |
} |
| 3735 |
|
3735 |
|
| 3736 |
if (Subtarget->hasBMI2()) { |
3736 |
if (Subtarget->hasBMI2()) { |
| 3737 |
// Great, just emit the the BZHI.. |
3737 |
// Great, just emit the the BZHI.. |
| 3738 |
if (NVT != MVT::i32) { |
3738 |
if (NVT != MVT::i32) { |
| 3739 |
// But have to place the bit count into the wide-enough register first. |
3739 |
// But have to place the bit count into the wide-enough register first. |
| 3740 |
NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); |
3740 |
NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); |
| 3741 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3741 |
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
| 3742 |
} |
3742 |
} |
| 3743 |
|
3743 |
|
| 3744 |
SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); |
3744 |
SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); |
| 3745 |
ReplaceNode(Node, Extract.getNode()); |
3745 |
ReplaceNode(Node, Extract.getNode()); |
| 3746 |
SelectCode(Extract.getNode()); |
3746 |
SelectCode(Extract.getNode()); |
| 3747 |
return true; |
3747 |
return true; |
| 3748 |
} |
3748 |
} |
| 3749 |
|
3749 |
|
| 3750 |
// Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is |
3750 |
// Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is |
| 3751 |
// *logically* shifted (potentially with one-use trunc inbetween), |
3751 |
// *logically* shifted (potentially with one-use trunc inbetween), |
| 3752 |
// and the truncation was the only use of the shift, |
3752 |
// and the truncation was the only use of the shift, |
| 3753 |
// and if so look past one-use truncation. |
3753 |
// and if so look past one-use truncation. |
| 3754 |
{ |
3754 |
{ |
| 3755 |
SDValue RealX = peekThroughOneUseTruncation(X); |
3755 |
SDValue RealX = peekThroughOneUseTruncation(X); |
| 3756 |
// FIXME: only if the shift is one-use? |
3756 |
// FIXME: only if the shift is one-use? |
| 3757 |
if (RealX != X && RealX.getOpcode() == ISD::SRL) |
3757 |
if (RealX != X && RealX.getOpcode() == ISD::SRL) |
| 3758 |
X = RealX; |
3758 |
X = RealX; |
| 3759 |
} |
3759 |
} |
| 3760 |
|
3760 |
|
| 3761 |
MVT XVT = X.getSimpleValueType(); |
3761 |
MVT XVT = X.getSimpleValueType(); |
| 3762 |
|
3762 |
|
| 3763 |
// Else, emitting BEXTR requires one more step. |
3763 |
// Else, emitting BEXTR requires one more step. |
| 3764 |
// The 'control' of BEXTR has the pattern of: |
3764 |
// The 'control' of BEXTR has the pattern of: |
| 3765 |
// [15...8 bit][ 7...0 bit] location |
3765 |
// [15...8 bit][ 7...0 bit] location |
| 3766 |
// [ bit count][ shift] name |
3766 |
// [ bit count][ shift] name |
| 3767 |
// I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 |
3767 |
// I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 |
| 3768 |
|
3768 |
|
| 3769 |
// Shift NBits left by 8 bits, thus producing 'control'. |
3769 |
// Shift NBits left by 8 bits, thus producing 'control'. |
| 3770 |
// This makes the low 8 bits to be zero. |
3770 |
// This makes the low 8 bits to be zero. |
| 3771 |
SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); |
3771 |
SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); |
| 3772 |
insertDAGNode(*CurDAG, SDValue(Node, 0), C8); |
3772 |
insertDAGNode(*CurDAG, SDValue(Node, 0), C8); |
| 3773 |
SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); |
3773 |
SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); |
| 3774 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3774 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
| 3775 |
|
3775 |
|
| 3776 |
// If the 'X' is *logically* shifted, we can fold that shift into 'control'. |
3776 |
// If the 'X' is *logically* shifted, we can fold that shift into 'control'. |
| 3777 |
// FIXME: only if the shift is one-use? |
3777 |
// FIXME: only if the shift is one-use? |
| 3778 |
if (X.getOpcode() == ISD::SRL) { |
3778 |
if (X.getOpcode() == ISD::SRL) { |
| 3779 |
SDValue ShiftAmt = X.getOperand(1); |
3779 |
SDValue ShiftAmt = X.getOperand(1); |
| 3780 |
X = X.getOperand(0); |
3780 |
X = X.getOperand(0); |
| 3781 |
|
3781 |
|
| 3782 |
assert(ShiftAmt.getValueType() == MVT::i8 && |
3782 |
assert(ShiftAmt.getValueType() == MVT::i8 && |
| 3783 |
"Expected shift amount to be i8"); |
3783 |
"Expected shift amount to be i8"); |
| 3784 |
|
3784 |
|
| 3785 |
// Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero! |
3785 |
// Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero! |
| 3786 |
// We could zext to i16 in some form, but we intentionally don't do that. |
3786 |
// We could zext to i16 in some form, but we intentionally don't do that. |
| 3787 |
SDValue OrigShiftAmt = ShiftAmt; |
3787 |
SDValue OrigShiftAmt = ShiftAmt; |
| 3788 |
ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt); |
3788 |
ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt); |
| 3789 |
insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt); |
3789 |
insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt); |
| 3790 |
|
3790 |
|
| 3791 |
// And now 'or' these low 8 bits of shift amount into the 'control'. |
3791 |
// And now 'or' these low 8 bits of shift amount into the 'control'. |
| 3792 |
Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt); |
3792 |
Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt); |
| 3793 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3793 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
| 3794 |
} |
3794 |
} |
| 3795 |
|
3795 |
|
| 3796 |
// But have to place the 'control' into the wide-enough register first. |
3796 |
// But have to place the 'control' into the wide-enough register first. |
| 3797 |
if (XVT != MVT::i32) { |
3797 |
if (XVT != MVT::i32) { |
| 3798 |
Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control); |
3798 |
Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control); |
| 3799 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3799 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
| 3800 |
} |
3800 |
} |
| 3801 |
|
3801 |
|
| 3802 |
// And finally, form the BEXTR itself. |
3802 |
// And finally, form the BEXTR itself. |
| 3803 |
SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control); |
3803 |
SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control); |
| 3804 |
|
3804 |
|
| 3805 |
// The 'X' was originally truncated. Do that now. |
3805 |
// The 'X' was originally truncated. Do that now. |
| 3806 |
if (XVT != NVT) { |
3806 |
if (XVT != NVT) { |
| 3807 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Extract); |
3807 |
insertDAGNode(*CurDAG, SDValue(Node, 0), Extract); |
| 3808 |
Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract); |
3808 |
Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract); |
| 3809 |
} |
3809 |
} |
| 3810 |
|
3810 |
|
| 3811 |
ReplaceNode(Node, Extract.getNode()); |
3811 |
ReplaceNode(Node, Extract.getNode()); |
| 3812 |
SelectCode(Extract.getNode()); |
3812 |
SelectCode(Extract.getNode()); |
| 3813 |
|
3813 |
|
| 3814 |
return true; |
3814 |
return true; |
| 3815 |
} |
3815 |
} |
| 3816 |
|
3816 |
|
| 3817 |
// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. |
3817 |
// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. |
| 3818 |
MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { |
3818 |
MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { |
| 3819 |
MVT NVT = Node->getSimpleValueType(0); |
3819 |
MVT NVT = Node->getSimpleValueType(0); |
| 3820 |
SDLoc dl(Node); |
3820 |
SDLoc dl(Node); |
| 3821 |
|
3821 |
|
| 3822 |
SDValue N0 = Node->getOperand(0); |
3822 |
SDValue N0 = Node->getOperand(0); |
| 3823 |
SDValue N1 = Node->getOperand(1); |
3823 |
SDValue N1 = Node->getOperand(1); |
| 3824 |
|
3824 |
|
| 3825 |
// If we have TBM we can use an immediate for the control. If we have BMI |
3825 |
// If we have TBM we can use an immediate for the control. If we have BMI |
| 3826 |
// we should only do this if the BEXTR instruction is implemented well. |
3826 |
// we should only do this if the BEXTR instruction is implemented well. |
| 3827 |
// Otherwise moving the control into a register makes this more costly. |
3827 |
// Otherwise moving the control into a register makes this more costly. |
| 3828 |
// TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM |
3828 |
// TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM |
| 3829 |
// hoisting the move immediate would make it worthwhile with a less optimal |
3829 |
// hoisting the move immediate would make it worthwhile with a less optimal |
| 3830 |
// BEXTR? |
3830 |
// BEXTR? |
| 3831 |
bool PreferBEXTR = |
3831 |
bool PreferBEXTR = |
| 3832 |
Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); |
3832 |
Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); |
| 3833 |
if (!PreferBEXTR && !Subtarget->hasBMI2()) |
3833 |
if (!PreferBEXTR && !Subtarget->hasBMI2()) |
| 3834 |
return nullptr; |
3834 |
return nullptr; |
| 3835 |
|
3835 |
|
| 3836 |
// Must have a shift right. |
3836 |
// Must have a shift right. |
| 3837 |
if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) |
3837 |
if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) |
| 3838 |
return nullptr; |
3838 |
return nullptr; |
| 3839 |
|
3839 |
|
| 3840 |
// Shift can't have additional users. |
3840 |
// Shift can't have additional users. |
| 3841 |
if (!N0->hasOneUse()) |
3841 |
if (!N0->hasOneUse()) |
| 3842 |
return nullptr; |
3842 |
return nullptr; |
| 3843 |
|
3843 |
|
| 3844 |
// Only supported for 32 and 64 bits. |
3844 |
// Only supported for 32 and 64 bits. |
| 3845 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
3845 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
| 3846 |
return nullptr; |
3846 |
return nullptr; |
| 3847 |
|
3847 |
|
| 3848 |
// Shift amount and RHS of and must be constant. |
3848 |
// Shift amount and RHS of and must be constant. |
| 3849 |
auto *MaskCst = dyn_cast(N1); |
3849 |
auto *MaskCst = dyn_cast(N1); |
| 3850 |
auto *ShiftCst = dyn_cast(N0->getOperand(1)); |
3850 |
auto *ShiftCst = dyn_cast(N0->getOperand(1)); |
| 3851 |
if (!MaskCst || !ShiftCst) |
3851 |
if (!MaskCst || !ShiftCst) |
| 3852 |
return nullptr; |
3852 |
return nullptr; |
| 3853 |
|
3853 |
|
| 3854 |
// And RHS must be a mask. |
3854 |
// And RHS must be a mask. |
| 3855 |
uint64_t Mask = MaskCst->getZExtValue(); |
3855 |
uint64_t Mask = MaskCst->getZExtValue(); |
| 3856 |
if (!isMask_64(Mask)) |
3856 |
if (!isMask_64(Mask)) |
| 3857 |
return nullptr; |
3857 |
return nullptr; |
| 3858 |
|
3858 |
|
| 3859 |
uint64_t Shift = ShiftCst->getZExtValue(); |
3859 |
uint64_t Shift = ShiftCst->getZExtValue(); |
| 3860 |
uint64_t MaskSize = llvm::popcount(Mask); |
3860 |
uint64_t MaskSize = llvm::popcount(Mask); |
| 3861 |
|
3861 |
|
| 3862 |
// Don't interfere with something that can be handled by extracting AH. |
3862 |
// Don't interfere with something that can be handled by extracting AH. |
| 3863 |
// TODO: If we are able to fold a load, BEXTR might still be better than AH. |
3863 |
// TODO: If we are able to fold a load, BEXTR might still be better than AH. |
| 3864 |
if (Shift == 8 && MaskSize == 8) |
3864 |
if (Shift == 8 && MaskSize == 8) |
| 3865 |
return nullptr; |
3865 |
return nullptr; |
| 3866 |
|
3866 |
|
| 3867 |
// Make sure we are only using bits that were in the original value, not |
3867 |
// Make sure we are only using bits that were in the original value, not |
| 3868 |
// shifted in. |
3868 |
// shifted in. |
| 3869 |
if (Shift + MaskSize > NVT.getSizeInBits()) |
3869 |
if (Shift + MaskSize > NVT.getSizeInBits()) |
| 3870 |
return nullptr; |
3870 |
return nullptr; |
| 3871 |
|
3871 |
|
| 3872 |
// BZHI, if available, is always fast, unlike BEXTR. But even if we decide |
3872 |
// BZHI, if available, is always fast, unlike BEXTR. But even if we decide |
| 3873 |
// that we can't use BEXTR, it is only worthwhile using BZHI if the mask |
3873 |
// that we can't use BEXTR, it is only worthwhile using BZHI if the mask |
| 3874 |
// does not fit into 32 bits. Load folding is not a sufficient reason. |
3874 |
// does not fit into 32 bits. Load folding is not a sufficient reason. |
| 3875 |
if (!PreferBEXTR && MaskSize <= 32) |
3875 |
if (!PreferBEXTR && MaskSize <= 32) |
| 3876 |
return nullptr; |
3876 |
return nullptr; |
| 3877 |
|
3877 |
|
| 3878 |
SDValue Control; |
3878 |
SDValue Control; |
| 3879 |
unsigned ROpc, MOpc; |
3879 |
unsigned ROpc, MOpc; |
| 3880 |
|
3880 |
|
| 3881 |
if (!PreferBEXTR) { |
3881 |
if (!PreferBEXTR) { |
| 3882 |
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); |
3882 |
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); |
| 3883 |
// If we can't make use of BEXTR then we can't fuse shift+mask stages. |
3883 |
// If we can't make use of BEXTR then we can't fuse shift+mask stages. |
| 3884 |
// Let's perform the mask first, and apply shift later. Note that we need to |
3884 |
// Let's perform the mask first, and apply shift later. Note that we need to |
| 3885 |
// widen the mask to account for the fact that we'll apply shift afterwards! |
3885 |
// widen the mask to account for the fact that we'll apply shift afterwards! |
| 3886 |
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); |
3886 |
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); |
| 3887 |
ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; |
3887 |
ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; |
| 3888 |
MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; |
3888 |
MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; |
| 3889 |
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
3889 |
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
| 3890 |
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
3890 |
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
| 3891 |
} else { |
3891 |
} else { |
| 3892 |
// The 'control' of BEXTR has the pattern of: |
3892 |
// The 'control' of BEXTR has the pattern of: |
| 3893 |
// [15...8 bit][ 7...0 bit] location |
3893 |
// [15...8 bit][ 7...0 bit] location |
| 3894 |
// [ bit count][ shift] name |
3894 |
// [ bit count][ shift] name |
| 3895 |
// I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 |
3895 |
// I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 |
| 3896 |
Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); |
3896 |
Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); |
| 3897 |
if (Subtarget->hasTBM()) { |
3897 |
if (Subtarget->hasTBM()) { |
| 3898 |
ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; |
3898 |
ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; |
| 3899 |
MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; |
3899 |
MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; |
| 3900 |
} else { |
3900 |
} else { |
| 3901 |
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); |
3901 |
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); |
| 3902 |
// BMI requires the immediate to placed in a register. |
3902 |
// BMI requires the immediate to placed in a register. |
| 3903 |
ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; |
3903 |
ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; |
| 3904 |
MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; |
3904 |
MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; |
| 3905 |
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
3905 |
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
| 3906 |
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
3906 |
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
| 3907 |
} |
3907 |
} |
| 3908 |
} |
3908 |
} |
| 3909 |
|
3909 |
|
| 3910 |
MachineSDNode *NewNode; |
3910 |
MachineSDNode *NewNode; |
| 3911 |
SDValue Input = N0->getOperand(0); |
3911 |
SDValue Input = N0->getOperand(0); |
| 3912 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3912 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 3913 |
if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3913 |
if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 3914 |
SDValue Ops[] = { |
3914 |
SDValue Ops[] = { |
| 3915 |
Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; |
3915 |
Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; |
| 3916 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
3916 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
| 3917 |
NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3917 |
NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 3918 |
// Update the chain. |
3918 |
// Update the chain. |
| 3919 |
ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); |
3919 |
ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); |
| 3920 |
// Record the mem-refs |
3920 |
// Record the mem-refs |
| 3921 |
CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); |
3921 |
CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); |
| 3922 |
} else { |
3922 |
} else { |
| 3923 |
NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); |
3923 |
NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); |
| 3924 |
} |
3924 |
} |
| 3925 |
|
3925 |
|
| 3926 |
if (!PreferBEXTR) { |
3926 |
if (!PreferBEXTR) { |
| 3927 |
// We still need to apply the shift. |
3927 |
// We still need to apply the shift. |
| 3928 |
SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); |
3928 |
SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); |
| 3929 |
unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; |
3929 |
unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; |
| 3930 |
NewNode = |
3930 |
NewNode = |
| 3931 |
CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); |
3931 |
CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); |
| 3932 |
} |
3932 |
} |
| 3933 |
|
3933 |
|
| 3934 |
return NewNode; |
3934 |
return NewNode; |
| 3935 |
} |
3935 |
} |
| 3936 |
|
3936 |
|
| 3937 |
// Emit a PCMISTR(I/M) instruction. |
3937 |
// Emit a PCMISTR(I/M) instruction. |
| 3938 |
MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, |
3938 |
MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, |
| 3939 |
bool MayFoldLoad, const SDLoc &dl, |
3939 |
bool MayFoldLoad, const SDLoc &dl, |
| 3940 |
MVT VT, SDNode *Node) { |
3940 |
MVT VT, SDNode *Node) { |
| 3941 |
SDValue N0 = Node->getOperand(0); |
3941 |
SDValue N0 = Node->getOperand(0); |
| 3942 |
SDValue N1 = Node->getOperand(1); |
3942 |
SDValue N1 = Node->getOperand(1); |
| 3943 |
SDValue Imm = Node->getOperand(2); |
3943 |
SDValue Imm = Node->getOperand(2); |
| 3944 |
auto *Val = cast(Imm)->getConstantIntValue(); |
3944 |
auto *Val = cast(Imm)->getConstantIntValue(); |
| 3945 |
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
3945 |
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
| 3946 |
|
3946 |
|
| 3947 |
// Try to fold a load. No need to check alignment. |
3947 |
// Try to fold a load. No need to check alignment. |
| 3948 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3948 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 3949 |
if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3949 |
if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 3950 |
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
3950 |
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
| 3951 |
N1.getOperand(0) }; |
3951 |
N1.getOperand(0) }; |
| 3952 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other); |
3952 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other); |
| 3953 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3953 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 3954 |
// Update the chain. |
3954 |
// Update the chain. |
| 3955 |
ReplaceUses(N1.getValue(1), SDValue(CNode, 2)); |
3955 |
ReplaceUses(N1.getValue(1), SDValue(CNode, 2)); |
| 3956 |
// Record the mem-refs |
3956 |
// Record the mem-refs |
| 3957 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
3957 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
| 3958 |
return CNode; |
3958 |
return CNode; |
| 3959 |
} |
3959 |
} |
| 3960 |
|
3960 |
|
| 3961 |
SDValue Ops[] = { N0, N1, Imm }; |
3961 |
SDValue Ops[] = { N0, N1, Imm }; |
| 3962 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32); |
3962 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32); |
| 3963 |
MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
3963 |
MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
| 3964 |
return CNode; |
3964 |
return CNode; |
| 3965 |
} |
3965 |
} |
| 3966 |
|
3966 |
|
| 3967 |
// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need |
3967 |
// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need |
| 3968 |
// to emit a second instruction after this one. This is needed since we have two |
3968 |
// to emit a second instruction after this one. This is needed since we have two |
| 3969 |
// copyToReg nodes glued before this and we need to continue that glue through. |
3969 |
// copyToReg nodes glued before this and we need to continue that glue through. |
| 3970 |
MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, |
3970 |
MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, |
| 3971 |
bool MayFoldLoad, const SDLoc &dl, |
3971 |
bool MayFoldLoad, const SDLoc &dl, |
| 3972 |
MVT VT, SDNode *Node, |
3972 |
MVT VT, SDNode *Node, |
| 3973 |
SDValue &InGlue) { |
3973 |
SDValue &InGlue) { |
| 3974 |
SDValue N0 = Node->getOperand(0); |
3974 |
SDValue N0 = Node->getOperand(0); |
| 3975 |
SDValue N2 = Node->getOperand(2); |
3975 |
SDValue N2 = Node->getOperand(2); |
| 3976 |
SDValue Imm = Node->getOperand(4); |
3976 |
SDValue Imm = Node->getOperand(4); |
| 3977 |
auto *Val = cast(Imm)->getConstantIntValue(); |
3977 |
auto *Val = cast(Imm)->getConstantIntValue(); |
| 3978 |
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
3978 |
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
| 3979 |
|
3979 |
|
| 3980 |
// Try to fold a load. No need to check alignment. |
3980 |
// Try to fold a load. No need to check alignment. |
| 3981 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3981 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 3982 |
if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3982 |
if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 3983 |
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
3983 |
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
| 3984 |
N2.getOperand(0), InGlue }; |
3984 |
N2.getOperand(0), InGlue }; |
| 3985 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue); |
3985 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue); |
| 3986 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3986 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 3987 |
InGlue = SDValue(CNode, 3); |
3987 |
InGlue = SDValue(CNode, 3); |
| 3988 |
// Update the chain. |
3988 |
// Update the chain. |
| 3989 |
ReplaceUses(N2.getValue(1), SDValue(CNode, 2)); |
3989 |
ReplaceUses(N2.getValue(1), SDValue(CNode, 2)); |
| 3990 |
// Record the mem-refs |
3990 |
// Record the mem-refs |
| 3991 |
CurDAG->setNodeMemRefs(CNode, {cast(N2)->getMemOperand()}); |
3991 |
CurDAG->setNodeMemRefs(CNode, {cast(N2)->getMemOperand()}); |
| 3992 |
return CNode; |
3992 |
return CNode; |
| 3993 |
} |
3993 |
} |
| 3994 |
|
3994 |
|
| 3995 |
SDValue Ops[] = { N0, N2, Imm, InGlue }; |
3995 |
SDValue Ops[] = { N0, N2, Imm, InGlue }; |
| 3996 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue); |
3996 |
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue); |
| 3997 |
MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
3997 |
MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
| 3998 |
InGlue = SDValue(CNode, 2); |
3998 |
InGlue = SDValue(CNode, 2); |
| 3999 |
return CNode; |
3999 |
return CNode; |
| 4000 |
} |
4000 |
} |
| 4001 |
|
4001 |
|
| 4002 |
bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { |
4002 |
bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { |
| 4003 |
EVT VT = N->getValueType(0); |
4003 |
EVT VT = N->getValueType(0); |
| 4004 |
|
4004 |
|
| 4005 |
// Only handle scalar shifts. |
4005 |
// Only handle scalar shifts. |
| 4006 |
if (VT.isVector()) |
4006 |
if (VT.isVector()) |
| 4007 |
return false; |
4007 |
return false; |
| 4008 |
|
4008 |
|
| 4009 |
// Narrower shifts only mask to 5 bits in hardware. |
4009 |
// Narrower shifts only mask to 5 bits in hardware. |
| 4010 |
unsigned Size = VT == MVT::i64 ? 64 : 32; |
4010 |
unsigned Size = VT == MVT::i64 ? 64 : 32; |
| 4011 |
|
4011 |
|
| 4012 |
SDValue OrigShiftAmt = N->getOperand(1); |
4012 |
SDValue OrigShiftAmt = N->getOperand(1); |
| 4013 |
SDValue ShiftAmt = OrigShiftAmt; |
4013 |
SDValue ShiftAmt = OrigShiftAmt; |
| 4014 |
SDLoc DL(N); |
4014 |
SDLoc DL(N); |
| 4015 |
|
4015 |
|
| 4016 |
// Skip over a truncate of the shift amount. |
4016 |
// Skip over a truncate of the shift amount. |
| 4017 |
if (ShiftAmt->getOpcode() == ISD::TRUNCATE) |
4017 |
if (ShiftAmt->getOpcode() == ISD::TRUNCATE) |
| 4018 |
ShiftAmt = ShiftAmt->getOperand(0); |
4018 |
ShiftAmt = ShiftAmt->getOperand(0); |
| 4019 |
|
4019 |
|
| 4020 |
// This function is called after X86DAGToDAGISel::matchBitExtract(), |
4020 |
// This function is called after X86DAGToDAGISel::matchBitExtract(), |
| 4021 |
// so we are not afraid that we might mess up BZHI/BEXTR pattern. |
4021 |
// so we are not afraid that we might mess up BZHI/BEXTR pattern. |
| 4022 |
|
4022 |
|
| 4023 |
SDValue NewShiftAmt; |
4023 |
SDValue NewShiftAmt; |
| 4024 |
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || |
4024 |
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || |
| 4025 |
ShiftAmt->getOpcode() == ISD::XOR) { |
4025 |
ShiftAmt->getOpcode() == ISD::XOR) { |
| 4026 |
SDValue Add0 = ShiftAmt->getOperand(0); |
4026 |
SDValue Add0 = ShiftAmt->getOperand(0); |
| 4027 |
SDValue Add1 = ShiftAmt->getOperand(1); |
4027 |
SDValue Add1 = ShiftAmt->getOperand(1); |
| 4028 |
auto *Add0C = dyn_cast(Add0); |
4028 |
auto *Add0C = dyn_cast(Add0); |
| 4029 |
auto *Add1C = dyn_cast(Add1); |
4029 |
auto *Add1C = dyn_cast(Add1); |
| 4030 |
// If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X |
4030 |
// If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X |
| 4031 |
// to avoid the ADD/SUB/XOR. |
4031 |
// to avoid the ADD/SUB/XOR. |
| 4032 |
if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { |
4032 |
if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { |
| 4033 |
NewShiftAmt = Add0; |
4033 |
NewShiftAmt = Add0; |
| 4034 |
|
4034 |
|
| 4035 |
} else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() && |
4035 |
} else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() && |
| 4036 |
((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) || |
4036 |
((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) || |
| 4037 |
(Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) { |
4037 |
(Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) { |
| 4038 |
// If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X |
4038 |
// If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X |
| 4039 |
// we can replace it with a NOT. In the XOR case it may save some code |
4039 |
// we can replace it with a NOT. In the XOR case it may save some code |
| 4040 |
// size, in the SUB case it also may save a move. |
4040 |
// size, in the SUB case it also may save a move. |
| 4041 |
assert(Add0C == nullptr || Add1C == nullptr); |
4041 |
assert(Add0C == nullptr || Add1C == nullptr); |
| 4042 |
|
4042 |
|
| 4043 |
// We can only do N-X, not X-N |
4043 |
// We can only do N-X, not X-N |
| 4044 |
if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr) |
4044 |
if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr) |
| 4045 |
return false; |
4045 |
return false; |
| 4046 |
|
4046 |
|
| 4047 |
EVT OpVT = ShiftAmt.getValueType(); |
4047 |
EVT OpVT = ShiftAmt.getValueType(); |
| 4048 |
|
4048 |
|
| 4049 |
SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT); |
4049 |
SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT); |
| 4050 |
NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT, |
4050 |
NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT, |
| 4051 |
Add0C == nullptr ? Add0 : Add1, AllOnes); |
4051 |
Add0C == nullptr ? Add0 : Add1, AllOnes); |
| 4052 |
insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes); |
4052 |
insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes); |
| 4053 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
4053 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
| 4054 |
// If we are shifting by N-X where N == 0 mod Size, then just shift by |
4054 |
// If we are shifting by N-X where N == 0 mod Size, then just shift by |
| 4055 |
// -X to generate a NEG instead of a SUB of a constant. |
4055 |
// -X to generate a NEG instead of a SUB of a constant. |
| 4056 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && |
4056 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && |
| 4057 |
Add0C->getZExtValue() != 0) { |
4057 |
Add0C->getZExtValue() != 0) { |
| 4058 |
EVT SubVT = ShiftAmt.getValueType(); |
4058 |
EVT SubVT = ShiftAmt.getValueType(); |
| 4059 |
SDValue X; |
4059 |
SDValue X; |
| 4060 |
if (Add0C->getZExtValue() % Size == 0) |
4060 |
if (Add0C->getZExtValue() % Size == 0) |
| 4061 |
X = Add1; |
4061 |
X = Add1; |
| 4062 |
else if (ShiftAmt.hasOneUse() && Size == 64 && |
4062 |
else if (ShiftAmt.hasOneUse() && Size == 64 && |
| 4063 |
Add0C->getZExtValue() % 32 == 0) { |
4063 |
Add0C->getZExtValue() % 32 == 0) { |
| 4064 |
// We have a 64-bit shift by (n*32-x), turn it into -(x+n*32). |
4064 |
// We have a 64-bit shift by (n*32-x), turn it into -(x+n*32). |
| 4065 |
// This is mainly beneficial if we already compute (x+n*32). |
4065 |
// This is mainly beneficial if we already compute (x+n*32). |
| 4066 |
if (Add1.getOpcode() == ISD::TRUNCATE) { |
4066 |
if (Add1.getOpcode() == ISD::TRUNCATE) { |
| 4067 |
Add1 = Add1.getOperand(0); |
4067 |
Add1 = Add1.getOperand(0); |
| 4068 |
SubVT = Add1.getValueType(); |
4068 |
SubVT = Add1.getValueType(); |
| 4069 |
} |
4069 |
} |
| 4070 |
if (Add0.getValueType() != SubVT) { |
4070 |
if (Add0.getValueType() != SubVT) { |
| 4071 |
Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT); |
4071 |
Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT); |
| 4072 |
insertDAGNode(*CurDAG, OrigShiftAmt, Add0); |
4072 |
insertDAGNode(*CurDAG, OrigShiftAmt, Add0); |
| 4073 |
} |
4073 |
} |
| 4074 |
|
4074 |
|
| 4075 |
X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0); |
4075 |
X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0); |
| 4076 |
insertDAGNode(*CurDAG, OrigShiftAmt, X); |
4076 |
insertDAGNode(*CurDAG, OrigShiftAmt, X); |
| 4077 |
} else |
4077 |
} else |
| 4078 |
return false; |
4078 |
return false; |
| 4079 |
// Insert a negate op. |
4079 |
// Insert a negate op. |
| 4080 |
// TODO: This isn't guaranteed to replace the sub if there is a logic cone |
4080 |
// TODO: This isn't guaranteed to replace the sub if there is a logic cone |
| 4081 |
// that uses it that's not a shift. |
4081 |
// that uses it that's not a shift. |
| 4082 |
SDValue Zero = CurDAG->getConstant(0, DL, SubVT); |
4082 |
SDValue Zero = CurDAG->getConstant(0, DL, SubVT); |
| 4083 |
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X); |
4083 |
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X); |
| 4084 |
NewShiftAmt = Neg; |
4084 |
NewShiftAmt = Neg; |
| 4085 |
|
4085 |
|
| 4086 |
// Insert these operands into a valid topological order so they can |
4086 |
// Insert these operands into a valid topological order so they can |
| 4087 |
// get selected independently. |
4087 |
// get selected independently. |
| 4088 |
insertDAGNode(*CurDAG, OrigShiftAmt, Zero); |
4088 |
insertDAGNode(*CurDAG, OrigShiftAmt, Zero); |
| 4089 |
insertDAGNode(*CurDAG, OrigShiftAmt, Neg); |
4089 |
insertDAGNode(*CurDAG, OrigShiftAmt, Neg); |
| 4090 |
} else |
4090 |
} else |
| 4091 |
return false; |
4091 |
return false; |
| 4092 |
} else |
4092 |
} else |
| 4093 |
return false; |
4093 |
return false; |
| 4094 |
|
4094 |
|
| 4095 |
if (NewShiftAmt.getValueType() != MVT::i8) { |
4095 |
if (NewShiftAmt.getValueType() != MVT::i8) { |
| 4096 |
// Need to truncate the shift amount. |
4096 |
// Need to truncate the shift amount. |
| 4097 |
NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); |
4097 |
NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); |
| 4098 |
// Add to a correct topological ordering. |
4098 |
// Add to a correct topological ordering. |
| 4099 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
4099 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
| 4100 |
} |
4100 |
} |
| 4101 |
|
4101 |
|
| 4102 |
// Insert a new mask to keep the shift amount legal. This should be removed |
4102 |
// Insert a new mask to keep the shift amount legal. This should be removed |
| 4103 |
// by isel patterns. |
4103 |
// by isel patterns. |
| 4104 |
NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, |
4104 |
NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, |
| 4105 |
CurDAG->getConstant(Size - 1, DL, MVT::i8)); |
4105 |
CurDAG->getConstant(Size - 1, DL, MVT::i8)); |
| 4106 |
// Place in a correct topological ordering. |
4106 |
// Place in a correct topological ordering. |
| 4107 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
4107 |
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
| 4108 |
|
4108 |
|
| 4109 |
SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), |
4109 |
SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), |
| 4110 |
NewShiftAmt); |
4110 |
NewShiftAmt); |
| 4111 |
if (UpdatedNode != N) { |
4111 |
if (UpdatedNode != N) { |
| 4112 |
// If we found an existing node, we should replace ourselves with that node |
4112 |
// If we found an existing node, we should replace ourselves with that node |
| 4113 |
// and wait for it to be selected after its other users. |
4113 |
// and wait for it to be selected after its other users. |
| 4114 |
ReplaceNode(N, UpdatedNode); |
4114 |
ReplaceNode(N, UpdatedNode); |
| 4115 |
return true; |
4115 |
return true; |
| 4116 |
} |
4116 |
} |
| 4117 |
|
4117 |
|
| 4118 |
// If the original shift amount is now dead, delete it so that we don't run |
4118 |
// If the original shift amount is now dead, delete it so that we don't run |
| 4119 |
// it through isel. |
4119 |
// it through isel. |
| 4120 |
if (OrigShiftAmt.getNode()->use_empty()) |
4120 |
if (OrigShiftAmt.getNode()->use_empty()) |
| 4121 |
CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); |
4121 |
CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); |
| 4122 |
|
4122 |
|
| 4123 |
// Now that we've optimized the shift amount, defer to normal isel to get |
4123 |
// Now that we've optimized the shift amount, defer to normal isel to get |
| 4124 |
// load folding and legacy vs BMI2 selection without repeating it here. |
4124 |
// load folding and legacy vs BMI2 selection without repeating it here. |
| 4125 |
SelectCode(N); |
4125 |
SelectCode(N); |
| 4126 |
return true; |
4126 |
return true; |
| 4127 |
} |
4127 |
} |
| 4128 |
|
4128 |
|
| 4129 |
bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { |
4129 |
bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { |
| 4130 |
MVT NVT = N->getSimpleValueType(0); |
4130 |
MVT NVT = N->getSimpleValueType(0); |
| 4131 |
unsigned Opcode = N->getOpcode(); |
4131 |
unsigned Opcode = N->getOpcode(); |
| 4132 |
SDLoc dl(N); |
4132 |
SDLoc dl(N); |
| 4133 |
|
4133 |
|
| 4134 |
// For operations of the form (x << C1) op C2, check if we can use a smaller |
4134 |
// For operations of the form (x << C1) op C2, check if we can use a smaller |
| 4135 |
// encoding for C2 by transforming it into (x op (C2>>C1)) << C1. |
4135 |
// encoding for C2 by transforming it into (x op (C2>>C1)) << C1. |
| 4136 |
SDValue Shift = N->getOperand(0); |
4136 |
SDValue Shift = N->getOperand(0); |
| 4137 |
SDValue N1 = N->getOperand(1); |
4137 |
SDValue N1 = N->getOperand(1); |
| 4138 |
|
4138 |
|
| 4139 |
auto *Cst = dyn_cast(N1); |
4139 |
auto *Cst = dyn_cast(N1); |
| 4140 |
if (!Cst) |
4140 |
if (!Cst) |
| 4141 |
return false; |
4141 |
return false; |
| 4142 |
|
4142 |
|
| 4143 |
int64_t Val = Cst->getSExtValue(); |
4143 |
int64_t Val = Cst->getSExtValue(); |
| 4144 |
|
4144 |
|
| 4145 |
// If we have an any_extend feeding the AND, look through it to see if there |
4145 |
// If we have an any_extend feeding the AND, look through it to see if there |
| 4146 |
// is a shift behind it. But only if the AND doesn't use the extended bits. |
4146 |
// is a shift behind it. But only if the AND doesn't use the extended bits. |
| 4147 |
// FIXME: Generalize this to other ANY_EXTEND than i32 to i64? |
4147 |
// FIXME: Generalize this to other ANY_EXTEND than i32 to i64? |
| 4148 |
bool FoundAnyExtend = false; |
4148 |
bool FoundAnyExtend = false; |
| 4149 |
if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
4149 |
if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
| 4150 |
Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
4150 |
Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
| 4151 |
isUInt<32>(Val)) { |
4151 |
isUInt<32>(Val)) { |
| 4152 |
FoundAnyExtend = true; |
4152 |
FoundAnyExtend = true; |
| 4153 |
Shift = Shift.getOperand(0); |
4153 |
Shift = Shift.getOperand(0); |
| 4154 |
} |
4154 |
} |
| 4155 |
|
4155 |
|
| 4156 |
if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) |
4156 |
if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) |
| 4157 |
return false; |
4157 |
return false; |
| 4158 |
|
4158 |
|
| 4159 |
// i8 is unshrinkable, i16 should be promoted to i32. |
4159 |
// i8 is unshrinkable, i16 should be promoted to i32. |
| 4160 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
4160 |
if (NVT != MVT::i32 && NVT != MVT::i64) |
| 4161 |
return false; |
4161 |
return false; |
| 4162 |
|
4162 |
|
| 4163 |
auto *ShlCst = dyn_cast(Shift.getOperand(1)); |
4163 |
auto *ShlCst = dyn_cast(Shift.getOperand(1)); |
| 4164 |
if (!ShlCst) |
4164 |
if (!ShlCst) |
| 4165 |
return false; |
4165 |
return false; |
| 4166 |
|
4166 |
|
| 4167 |
uint64_t ShAmt = ShlCst->getZExtValue(); |
4167 |
uint64_t ShAmt = ShlCst->getZExtValue(); |
| 4168 |
|
4168 |
|
| 4169 |
// Make sure that we don't change the operation by removing bits. |
4169 |
// Make sure that we don't change the operation by removing bits. |
| 4170 |
// This only matters for OR and XOR, AND is unaffected. |
4170 |
// This only matters for OR and XOR, AND is unaffected. |
| 4171 |
uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; |
4171 |
uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; |
| 4172 |
if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) |
4172 |
if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) |
| 4173 |
return false; |
4173 |
return false; |
| 4174 |
|
4174 |
|
| 4175 |
// Check the minimum bitwidth for the new constant. |
4175 |
// Check the minimum bitwidth for the new constant. |
| 4176 |
// TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. |
4176 |
// TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. |
| 4177 |
auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { |
4177 |
auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { |
| 4178 |
if (Opcode == ISD::AND) { |
4178 |
if (Opcode == ISD::AND) { |
| 4179 |
// AND32ri is the same as AND64ri32 with zext imm. |
4179 |
// AND32ri is the same as AND64ri32 with zext imm. |
| 4180 |
// Try this before sign extended immediates below. |
4180 |
// Try this before sign extended immediates below. |
| 4181 |
ShiftedVal = (uint64_t)Val >> ShAmt; |
4181 |
ShiftedVal = (uint64_t)Val >> ShAmt; |
| 4182 |
if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
4182 |
if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
| 4183 |
return true; |
4183 |
return true; |
| 4184 |
// Also swap order when the AND can become MOVZX. |
4184 |
// Also swap order when the AND can become MOVZX. |
| 4185 |
if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX) |
4185 |
if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX) |
| 4186 |
return true; |
4186 |
return true; |
| 4187 |
} |
4187 |
} |
| 4188 |
ShiftedVal = Val >> ShAmt; |
4188 |
ShiftedVal = Val >> ShAmt; |
| 4189 |
if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || |
4189 |
if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || |
| 4190 |
(!isInt<32>(Val) && isInt<32>(ShiftedVal))) |
4190 |
(!isInt<32>(Val) && isInt<32>(ShiftedVal))) |
| 4191 |
return true; |
4191 |
return true; |
| 4192 |
if (Opcode != ISD::AND) { |
4192 |
if (Opcode != ISD::AND) { |
| 4193 |
// MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr |
4193 |
// MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr |
| 4194 |
ShiftedVal = (uint64_t)Val >> ShAmt; |
4194 |
ShiftedVal = (uint64_t)Val >> ShAmt; |
| 4195 |
if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
4195 |
if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
| 4196 |
return true; |
4196 |
return true; |
| 4197 |
} |
4197 |
} |
| 4198 |
return false; |
4198 |
return false; |
| 4199 |
}; |
4199 |
}; |
| 4200 |
|
4200 |
|
| 4201 |
int64_t ShiftedVal; |
4201 |
int64_t ShiftedVal; |
| 4202 |
if (!CanShrinkImmediate(ShiftedVal)) |
4202 |
if (!CanShrinkImmediate(ShiftedVal)) |
| 4203 |
return false; |
4203 |
return false; |
| 4204 |
|
4204 |
|
| 4205 |
// Ok, we can reorder to get a smaller immediate. |
4205 |
// Ok, we can reorder to get a smaller immediate. |
| 4206 |
|
4206 |
|
| 4207 |
// But, its possible the original immediate allowed an AND to become MOVZX. |
4207 |
// But, its possible the original immediate allowed an AND to become MOVZX. |
| 4208 |
// Doing this late due to avoid the MakedValueIsZero call as late as |
4208 |
// Doing this late due to avoid the MakedValueIsZero call as late as |
| 4209 |
// possible. |
4209 |
// possible. |
| 4210 |
if (Opcode == ISD::AND) { |
4210 |
if (Opcode == ISD::AND) { |
| 4211 |
// Find the smallest zext this could possibly be. |
4211 |
// Find the smallest zext this could possibly be. |
| 4212 |
unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); |
4212 |
unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); |
| 4213 |
ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U)); |
4213 |
ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U)); |
| 4214 |
|
4214 |
|
| 4215 |
// Figure out which bits need to be zero to achieve that mask. |
4215 |
// Figure out which bits need to be zero to achieve that mask. |
| 4216 |
APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(), |
4216 |
APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(), |
| 4217 |
ZExtWidth); |
4217 |
ZExtWidth); |
| 4218 |
NeededMask &= ~Cst->getAPIntValue(); |
4218 |
NeededMask &= ~Cst->getAPIntValue(); |
| 4219 |
|
4219 |
|
| 4220 |
if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask)) |
4220 |
if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask)) |
| 4221 |
return false; |
4221 |
return false; |
| 4222 |
} |
4222 |
} |
| 4223 |
|
4223 |
|
| 4224 |
SDValue X = Shift.getOperand(0); |
4224 |
SDValue X = Shift.getOperand(0); |
| 4225 |
if (FoundAnyExtend) { |
4225 |
if (FoundAnyExtend) { |
| 4226 |
SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X); |
4226 |
SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X); |
| 4227 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewX); |
4227 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewX); |
| 4228 |
X = NewX; |
4228 |
X = NewX; |
| 4229 |
} |
4229 |
} |
| 4230 |
|
4230 |
|
| 4231 |
SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); |
4231 |
SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); |
| 4232 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); |
4232 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); |
| 4233 |
SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst); |
4233 |
SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst); |
| 4234 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); |
4234 |
insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); |
| 4235 |
SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, |
4235 |
SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, |
| 4236 |
Shift.getOperand(1)); |
4236 |
Shift.getOperand(1)); |
| 4237 |
ReplaceNode(N, NewSHL.getNode()); |
4237 |
ReplaceNode(N, NewSHL.getNode()); |
| 4238 |
SelectCode(NewSHL.getNode()); |
4238 |
SelectCode(NewSHL.getNode()); |
| 4239 |
return true; |
4239 |
return true; |
| 4240 |
} |
4240 |
} |
| 4241 |
|
4241 |
|
| 4242 |
bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, |
4242 |
bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, |
| 4243 |
SDNode *ParentB, SDNode *ParentC, |
4243 |
SDNode *ParentB, SDNode *ParentC, |
| 4244 |
SDValue A, SDValue B, SDValue C, |
4244 |
SDValue A, SDValue B, SDValue C, |
| 4245 |
uint8_t Imm) { |
4245 |
uint8_t Imm) { |
| 4246 |
assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) && |
4246 |
assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) && |
| 4247 |
C.isOperandOf(ParentC) && "Incorrect parent node"); |
4247 |
C.isOperandOf(ParentC) && "Incorrect parent node"); |
| 4248 |
|
4248 |
|
| 4249 |
auto tryFoldLoadOrBCast = |
4249 |
auto tryFoldLoadOrBCast = |
| 4250 |
[this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale, |
4250 |
[this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale, |
| 4251 |
SDValue &Index, SDValue &Disp, SDValue &Segment) { |
4251 |
SDValue &Index, SDValue &Disp, SDValue &Segment) { |
| 4252 |
if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
4252 |
if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
| 4253 |
return true; |
4253 |
return true; |
| 4254 |
|
4254 |
|
| 4255 |
// Not a load, check for broadcast which may be behind a bitcast. |
4255 |
// Not a load, check for broadcast which may be behind a bitcast. |
| 4256 |
if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
4256 |
if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
| 4257 |
P = L.getNode(); |
4257 |
P = L.getNode(); |
| 4258 |
L = L.getOperand(0); |
4258 |
L = L.getOperand(0); |
| 4259 |
} |
4259 |
} |
| 4260 |
|
4260 |
|
| 4261 |
if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
4261 |
if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
| 4262 |
return false; |
4262 |
return false; |
| 4263 |
|
4263 |
|
| 4264 |
// Only 32 and 64 bit broadcasts are supported. |
4264 |
// Only 32 and 64 bit broadcasts are supported. |
| 4265 |
auto *MemIntr = cast(L); |
4265 |
auto *MemIntr = cast(L); |
| 4266 |
unsigned Size = MemIntr->getMemoryVT().getSizeInBits(); |
4266 |
unsigned Size = MemIntr->getMemoryVT().getSizeInBits(); |
| 4267 |
if (Size != 32 && Size != 64) |
4267 |
if (Size != 32 && Size != 64) |
| 4268 |
return false; |
4268 |
return false; |
| 4269 |
|
4269 |
|
| 4270 |
return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
4270 |
return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
| 4271 |
}; |
4271 |
}; |
| 4272 |
|
4272 |
|
| 4273 |
bool FoldedLoad = false; |
4273 |
bool FoldedLoad = false; |
| 4274 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4274 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 4275 |
if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
4275 |
if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 4276 |
FoldedLoad = true; |
4276 |
FoldedLoad = true; |
| 4277 |
} else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3, |
4277 |
} else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3, |
| 4278 |
Tmp4)) { |
4278 |
Tmp4)) { |
| 4279 |
FoldedLoad = true; |
4279 |
FoldedLoad = true; |
| 4280 |
std::swap(A, C); |
4280 |
std::swap(A, C); |
| 4281 |
// Swap bits 1/4 and 3/6. |
4281 |
// Swap bits 1/4 and 3/6. |
| 4282 |
uint8_t OldImm = Imm; |
4282 |
uint8_t OldImm = Imm; |
| 4283 |
Imm = OldImm & 0xa5; |
4283 |
Imm = OldImm & 0xa5; |
| 4284 |
if (OldImm & 0x02) Imm |= 0x10; |
4284 |
if (OldImm & 0x02) Imm |= 0x10; |
| 4285 |
if (OldImm & 0x10) Imm |= 0x02; |
4285 |
if (OldImm & 0x10) Imm |= 0x02; |
| 4286 |
if (OldImm & 0x08) Imm |= 0x40; |
4286 |
if (OldImm & 0x08) Imm |= 0x40; |
| 4287 |
if (OldImm & 0x40) Imm |= 0x08; |
4287 |
if (OldImm & 0x40) Imm |= 0x08; |
| 4288 |
} else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3, |
4288 |
} else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3, |
| 4289 |
Tmp4)) { |
4289 |
Tmp4)) { |
| 4290 |
FoldedLoad = true; |
4290 |
FoldedLoad = true; |
| 4291 |
std::swap(B, C); |
4291 |
std::swap(B, C); |
| 4292 |
// Swap bits 1/2 and 5/6. |
4292 |
// Swap bits 1/2 and 5/6. |
| 4293 |
uint8_t OldImm = Imm; |
4293 |
uint8_t OldImm = Imm; |
| 4294 |
Imm = OldImm & 0x99; |
4294 |
Imm = OldImm & 0x99; |
| 4295 |
if (OldImm & 0x02) Imm |= 0x04; |
4295 |
if (OldImm & 0x02) Imm |= 0x04; |
| 4296 |
if (OldImm & 0x04) Imm |= 0x02; |
4296 |
if (OldImm & 0x04) Imm |= 0x02; |
| 4297 |
if (OldImm & 0x20) Imm |= 0x40; |
4297 |
if (OldImm & 0x20) Imm |= 0x40; |
| 4298 |
if (OldImm & 0x40) Imm |= 0x20; |
4298 |
if (OldImm & 0x40) Imm |= 0x20; |
| 4299 |
} |
4299 |
} |
| 4300 |
|
4300 |
|
| 4301 |
SDLoc DL(Root); |
4301 |
SDLoc DL(Root); |
| 4302 |
|
4302 |
|
| 4303 |
SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
4303 |
SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
| 4304 |
|
4304 |
|
| 4305 |
MVT NVT = Root->getSimpleValueType(0); |
4305 |
MVT NVT = Root->getSimpleValueType(0); |
| 4306 |
|
4306 |
|
| 4307 |
MachineSDNode *MNode; |
4307 |
MachineSDNode *MNode; |
| 4308 |
if (FoldedLoad) { |
4308 |
if (FoldedLoad) { |
| 4309 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
4309 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
| 4310 |
|
4310 |
|
| 4311 |
unsigned Opc; |
4311 |
unsigned Opc; |
| 4312 |
if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
4312 |
if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
| 4313 |
auto *MemIntr = cast(C); |
4313 |
auto *MemIntr = cast(C); |
| 4314 |
unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits(); |
4314 |
unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits(); |
| 4315 |
assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!"); |
4315 |
assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!"); |
| 4316 |
|
4316 |
|
| 4317 |
bool UseD = EltSize == 32; |
4317 |
bool UseD = EltSize == 32; |
| 4318 |
if (NVT.is128BitVector()) |
4318 |
if (NVT.is128BitVector()) |
| 4319 |
Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi; |
4319 |
Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi; |
| 4320 |
else if (NVT.is256BitVector()) |
4320 |
else if (NVT.is256BitVector()) |
| 4321 |
Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi; |
4321 |
Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi; |
| 4322 |
else if (NVT.is512BitVector()) |
4322 |
else if (NVT.is512BitVector()) |
| 4323 |
Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi; |
4323 |
Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi; |
| 4324 |
else |
4324 |
else |
| 4325 |
llvm_unreachable("Unexpected vector size!"); |
4325 |
llvm_unreachable("Unexpected vector size!"); |
| 4326 |
} else { |
4326 |
} else { |
| 4327 |
bool UseD = NVT.getVectorElementType() == MVT::i32; |
4327 |
bool UseD = NVT.getVectorElementType() == MVT::i32; |
| 4328 |
if (NVT.is128BitVector()) |
4328 |
if (NVT.is128BitVector()) |
| 4329 |
Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi; |
4329 |
Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi; |
| 4330 |
else if (NVT.is256BitVector()) |
4330 |
else if (NVT.is256BitVector()) |
| 4331 |
Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi; |
4331 |
Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi; |
| 4332 |
else if (NVT.is512BitVector()) |
4332 |
else if (NVT.is512BitVector()) |
| 4333 |
Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi; |
4333 |
Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi; |
| 4334 |
else |
4334 |
else |
| 4335 |
llvm_unreachable("Unexpected vector size!"); |
4335 |
llvm_unreachable("Unexpected vector size!"); |
| 4336 |
} |
4336 |
} |
| 4337 |
|
4337 |
|
| 4338 |
SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)}; |
4338 |
SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)}; |
| 4339 |
MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); |
4339 |
MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); |
| 4340 |
|
4340 |
|
| 4341 |
// Update the chain. |
4341 |
// Update the chain. |
| 4342 |
ReplaceUses(C.getValue(1), SDValue(MNode, 1)); |
4342 |
ReplaceUses(C.getValue(1), SDValue(MNode, 1)); |
| 4343 |
// Record the mem-refs |
4343 |
// Record the mem-refs |
| 4344 |
CurDAG->setNodeMemRefs(MNode, {cast(C)->getMemOperand()}); |
4344 |
CurDAG->setNodeMemRefs(MNode, {cast(C)->getMemOperand()}); |
| 4345 |
} else { |
4345 |
} else { |
| 4346 |
bool UseD = NVT.getVectorElementType() == MVT::i32; |
4346 |
bool UseD = NVT.getVectorElementType() == MVT::i32; |
| 4347 |
unsigned Opc; |
4347 |
unsigned Opc; |
| 4348 |
if (NVT.is128BitVector()) |
4348 |
if (NVT.is128BitVector()) |
| 4349 |
Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri; |
4349 |
Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri; |
| 4350 |
else if (NVT.is256BitVector()) |
4350 |
else if (NVT.is256BitVector()) |
| 4351 |
Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri; |
4351 |
Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri; |
| 4352 |
else if (NVT.is512BitVector()) |
4352 |
else if (NVT.is512BitVector()) |
| 4353 |
Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri; |
4353 |
Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri; |
| 4354 |
else |
4354 |
else |
| 4355 |
llvm_unreachable("Unexpected vector size!"); |
4355 |
llvm_unreachable("Unexpected vector size!"); |
| 4356 |
|
4356 |
|
| 4357 |
MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm}); |
4357 |
MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm}); |
| 4358 |
} |
4358 |
} |
| 4359 |
|
4359 |
|
| 4360 |
ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0)); |
4360 |
ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0)); |
| 4361 |
CurDAG->RemoveDeadNode(Root); |
4361 |
CurDAG->RemoveDeadNode(Root); |
| 4362 |
return true; |
4362 |
return true; |
| 4363 |
} |
4363 |
} |
| 4364 |
|
4364 |
|
| 4365 |
// Try to match two logic ops to a VPTERNLOG. |
4365 |
// Try to match two logic ops to a VPTERNLOG. |
| 4366 |
// FIXME: Handle more complex patterns that use an operand more than once? |
4366 |
// FIXME: Handle more complex patterns that use an operand more than once? |
| 4367 |
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { |
4367 |
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { |
| 4368 |
MVT NVT = N->getSimpleValueType(0); |
4368 |
MVT NVT = N->getSimpleValueType(0); |
| 4369 |
|
4369 |
|
| 4370 |
// Make sure we support VPTERNLOG. |
4370 |
// Make sure we support VPTERNLOG. |
| 4371 |
if (!NVT.isVector() || !Subtarget->hasAVX512() || |
4371 |
if (!NVT.isVector() || !Subtarget->hasAVX512() || |
| 4372 |
NVT.getVectorElementType() == MVT::i1) |
4372 |
NVT.getVectorElementType() == MVT::i1) |
| 4373 |
return false; |
4373 |
return false; |
| 4374 |
|
4374 |
|
| 4375 |
// We need VLX for 128/256-bit. |
4375 |
// We need VLX for 128/256-bit. |
| 4376 |
if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
4376 |
if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
| 4377 |
return false; |
4377 |
return false; |
| 4378 |
|
4378 |
|
| 4379 |
SDValue N0 = N->getOperand(0); |
4379 |
SDValue N0 = N->getOperand(0); |
| 4380 |
SDValue N1 = N->getOperand(1); |
4380 |
SDValue N1 = N->getOperand(1); |
| 4381 |
|
4381 |
|
| 4382 |
auto getFoldableLogicOp = [](SDValue Op) { |
4382 |
auto getFoldableLogicOp = [](SDValue Op) { |
| 4383 |
// Peek through single use bitcast. |
4383 |
// Peek through single use bitcast. |
| 4384 |
if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) |
4384 |
if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) |
| 4385 |
Op = Op.getOperand(0); |
4385 |
Op = Op.getOperand(0); |
| 4386 |
|
4386 |
|
| 4387 |
if (!Op.hasOneUse()) |
4387 |
if (!Op.hasOneUse()) |
| 4388 |
return SDValue(); |
4388 |
return SDValue(); |
| 4389 |
|
4389 |
|
| 4390 |
unsigned Opc = Op.getOpcode(); |
4390 |
unsigned Opc = Op.getOpcode(); |
| 4391 |
if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || |
4391 |
if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || |
| 4392 |
Opc == X86ISD::ANDNP) |
4392 |
Opc == X86ISD::ANDNP) |
| 4393 |
return Op; |
4393 |
return Op; |
| 4394 |
|
4394 |
|
| 4395 |
return SDValue(); |
4395 |
return SDValue(); |
| 4396 |
}; |
4396 |
}; |
| 4397 |
|
4397 |
|
| 4398 |
SDValue A, FoldableOp; |
4398 |
SDValue A, FoldableOp; |
| 4399 |
if ((FoldableOp = getFoldableLogicOp(N1))) { |
4399 |
if ((FoldableOp = getFoldableLogicOp(N1))) { |
| 4400 |
A = N0; |
4400 |
A = N0; |
| 4401 |
} else if ((FoldableOp = getFoldableLogicOp(N0))) { |
4401 |
} else if ((FoldableOp = getFoldableLogicOp(N0))) { |
| 4402 |
A = N1; |
4402 |
A = N1; |
| 4403 |
} else |
4403 |
} else |
| 4404 |
return false; |
4404 |
return false; |
| 4405 |
|
4405 |
|
| 4406 |
SDValue B = FoldableOp.getOperand(0); |
4406 |
SDValue B = FoldableOp.getOperand(0); |
| 4407 |
SDValue C = FoldableOp.getOperand(1); |
4407 |
SDValue C = FoldableOp.getOperand(1); |
| 4408 |
SDNode *ParentA = N; |
4408 |
SDNode *ParentA = N; |
| 4409 |
SDNode *ParentB = FoldableOp.getNode(); |
4409 |
SDNode *ParentB = FoldableOp.getNode(); |
| 4410 |
SDNode *ParentC = FoldableOp.getNode(); |
4410 |
SDNode *ParentC = FoldableOp.getNode(); |
| 4411 |
|
4411 |
|
| 4412 |
// We can build the appropriate control immediate by performing the logic |
4412 |
// We can build the appropriate control immediate by performing the logic |
| 4413 |
// operation we're matching using these constants for A, B, and C. |
4413 |
// operation we're matching using these constants for A, B, and C. |
| 4414 |
uint8_t TernlogMagicA = 0xf0; |
4414 |
uint8_t TernlogMagicA = 0xf0; |
| 4415 |
uint8_t TernlogMagicB = 0xcc; |
4415 |
uint8_t TernlogMagicB = 0xcc; |
| 4416 |
uint8_t TernlogMagicC = 0xaa; |
4416 |
uint8_t TernlogMagicC = 0xaa; |
| 4417 |
|
4417 |
|
| 4418 |
// Some of the inputs may be inverted, peek through them and invert the |
4418 |
// Some of the inputs may be inverted, peek through them and invert the |
| 4419 |
// magic values accordingly. |
4419 |
// magic values accordingly. |
| 4420 |
// TODO: There may be a bitcast before the xor that we should peek through. |
4420 |
// TODO: There may be a bitcast before the xor that we should peek through. |
| 4421 |
auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) { |
4421 |
auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) { |
| 4422 |
if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() && |
4422 |
if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() && |
| 4423 |
ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) { |
4423 |
ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) { |
| 4424 |
Magic = ~Magic; |
4424 |
Magic = ~Magic; |
| 4425 |
Parent = Op.getNode(); |
4425 |
Parent = Op.getNode(); |
| 4426 |
Op = Op.getOperand(0); |
4426 |
Op = Op.getOperand(0); |
| 4427 |
} |
4427 |
} |
| 4428 |
}; |
4428 |
}; |
| 4429 |
|
4429 |
|
| 4430 |
PeekThroughNot(A, ParentA, TernlogMagicA); |
4430 |
PeekThroughNot(A, ParentA, TernlogMagicA); |
| 4431 |
PeekThroughNot(B, ParentB, TernlogMagicB); |
4431 |
PeekThroughNot(B, ParentB, TernlogMagicB); |
| 4432 |
PeekThroughNot(C, ParentC, TernlogMagicC); |
4432 |
PeekThroughNot(C, ParentC, TernlogMagicC); |
| 4433 |
|
4433 |
|
| 4434 |
uint8_t Imm; |
4434 |
uint8_t Imm; |
| 4435 |
switch (FoldableOp.getOpcode()) { |
4435 |
switch (FoldableOp.getOpcode()) { |
| 4436 |
default: llvm_unreachable("Unexpected opcode!"); |
4436 |
default: llvm_unreachable("Unexpected opcode!"); |
| 4437 |
case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break; |
4437 |
case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break; |
| 4438 |
case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break; |
4438 |
case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break; |
| 4439 |
case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break; |
4439 |
case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break; |
| 4440 |
case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; |
4440 |
case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; |
| 4441 |
} |
4441 |
} |
| 4442 |
|
4442 |
|
| 4443 |
switch (N->getOpcode()) { |
4443 |
switch (N->getOpcode()) { |
| 4444 |
default: llvm_unreachable("Unexpected opcode!"); |
4444 |
default: llvm_unreachable("Unexpected opcode!"); |
| 4445 |
case X86ISD::ANDNP: |
4445 |
case X86ISD::ANDNP: |
| 4446 |
if (A == N0) |
4446 |
if (A == N0) |
| 4447 |
Imm &= ~TernlogMagicA; |
4447 |
Imm &= ~TernlogMagicA; |
| 4448 |
else |
4448 |
else |
| 4449 |
Imm = ~(Imm) & TernlogMagicA; |
4449 |
Imm = ~(Imm) & TernlogMagicA; |
| 4450 |
break; |
4450 |
break; |
| 4451 |
case ISD::AND: Imm &= TernlogMagicA; break; |
4451 |
case ISD::AND: Imm &= TernlogMagicA; break; |
| 4452 |
case ISD::OR: Imm |= TernlogMagicA; break; |
4452 |
case ISD::OR: Imm |= TernlogMagicA; break; |
| 4453 |
case ISD::XOR: Imm ^= TernlogMagicA; break; |
4453 |
case ISD::XOR: Imm ^= TernlogMagicA; break; |
| 4454 |
} |
4454 |
} |
| 4455 |
|
4455 |
|
| 4456 |
return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); |
4456 |
return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); |
| 4457 |
} |
4457 |
} |
| 4458 |
|
4458 |
|
| 4459 |
/// If the high bits of an 'and' operand are known zero, try setting the |
4459 |
/// If the high bits of an 'and' operand are known zero, try setting the |
| 4460 |
/// high bits of an 'and' constant operand to produce a smaller encoding by |
4460 |
/// high bits of an 'and' constant operand to produce a smaller encoding by |
| 4461 |
/// creating a small, sign-extended negative immediate rather than a large |
4461 |
/// creating a small, sign-extended negative immediate rather than a large |
| 4462 |
/// positive one. This reverses a transform in SimplifyDemandedBits that |
4462 |
/// positive one. This reverses a transform in SimplifyDemandedBits that |
| 4463 |
/// shrinks mask constants by clearing bits. There is also a possibility that |
4463 |
/// shrinks mask constants by clearing bits. There is also a possibility that |
| 4464 |
/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that |
4464 |
/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that |
| 4465 |
/// case, just replace the 'and'. Return 'true' if the node is replaced. |
4465 |
/// case, just replace the 'and'. Return 'true' if the node is replaced. |
| 4466 |
bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { |
4466 |
bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { |
| 4467 |
// i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't |
4467 |
// i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't |
| 4468 |
// have immediate operands. |
4468 |
// have immediate operands. |
| 4469 |
MVT VT = And->getSimpleValueType(0); |
4469 |
MVT VT = And->getSimpleValueType(0); |
| 4470 |
if (VT != MVT::i32 && VT != MVT::i64) |
4470 |
if (VT != MVT::i32 && VT != MVT::i64) |
| 4471 |
return false; |
4471 |
return false; |
| 4472 |
|
4472 |
|
| 4473 |
auto *And1C = dyn_cast(And->getOperand(1)); |
4473 |
auto *And1C = dyn_cast(And->getOperand(1)); |
| 4474 |
if (!And1C) |
4474 |
if (!And1C) |
| 4475 |
return false; |
4475 |
return false; |
| 4476 |
|
4476 |
|
| 4477 |
// Bail out if the mask constant is already negative. It's can't shrink more. |
4477 |
// Bail out if the mask constant is already negative. It's can't shrink more. |
| 4478 |
// If the upper 32 bits of a 64 bit mask are all zeros, we have special isel |
4478 |
// If the upper 32 bits of a 64 bit mask are all zeros, we have special isel |
| 4479 |
// patterns to use a 32-bit and instead of a 64-bit and by relying on the |
4479 |
// patterns to use a 32-bit and instead of a 64-bit and by relying on the |
| 4480 |
// implicit zeroing of 32 bit ops. So we should check if the lower 32 bits |
4480 |
// implicit zeroing of 32 bit ops. So we should check if the lower 32 bits |
| 4481 |
// are negative too. |
4481 |
// are negative too. |
| 4482 |
APInt MaskVal = And1C->getAPIntValue(); |
4482 |
APInt MaskVal = And1C->getAPIntValue(); |
| 4483 |
unsigned MaskLZ = MaskVal.countl_zero(); |
4483 |
unsigned MaskLZ = MaskVal.countl_zero(); |
| 4484 |
if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) |
4484 |
if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) |
| 4485 |
return false; |
4485 |
return false; |
| 4486 |
|
4486 |
|
| 4487 |
// Don't extend into the upper 32 bits of a 64 bit mask. |
4487 |
// Don't extend into the upper 32 bits of a 64 bit mask. |
| 4488 |
if (VT == MVT::i64 && MaskLZ >= 32) { |
4488 |
if (VT == MVT::i64 && MaskLZ >= 32) { |
| 4489 |
MaskLZ -= 32; |
4489 |
MaskLZ -= 32; |
| 4490 |
MaskVal = MaskVal.trunc(32); |
4490 |
MaskVal = MaskVal.trunc(32); |
| 4491 |
} |
4491 |
} |
| 4492 |
|
4492 |
|
| 4493 |
SDValue And0 = And->getOperand(0); |
4493 |
SDValue And0 = And->getOperand(0); |
| 4494 |
APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ); |
4494 |
APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ); |
| 4495 |
APInt NegMaskVal = MaskVal | HighZeros; |
4495 |
APInt NegMaskVal = MaskVal | HighZeros; |
| 4496 |
|
4496 |
|
| 4497 |
// If a negative constant would not allow a smaller encoding, there's no need |
4497 |
// If a negative constant would not allow a smaller encoding, there's no need |
| 4498 |
// to continue. Only change the constant when we know it's a win. |
4498 |
// to continue. Only change the constant when we know it's a win. |
| 4499 |
unsigned MinWidth = NegMaskVal.getSignificantBits(); |
4499 |
unsigned MinWidth = NegMaskVal.getSignificantBits(); |
| 4500 |
if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32)) |
4500 |
if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32)) |
| 4501 |
return false; |
4501 |
return false; |
| 4502 |
|
4502 |
|
| 4503 |
// Extend masks if we truncated above. |
4503 |
// Extend masks if we truncated above. |
| 4504 |
if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { |
4504 |
if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { |
| 4505 |
NegMaskVal = NegMaskVal.zext(64); |
4505 |
NegMaskVal = NegMaskVal.zext(64); |
| 4506 |
HighZeros = HighZeros.zext(64); |
4506 |
HighZeros = HighZeros.zext(64); |
| 4507 |
} |
4507 |
} |
| 4508 |
|
4508 |
|
| 4509 |
// The variable operand must be all zeros in the top bits to allow using the |
4509 |
// The variable operand must be all zeros in the top bits to allow using the |
| 4510 |
// new, negative constant as the mask. |
4510 |
// new, negative constant as the mask. |
| 4511 |
if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) |
4511 |
if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) |
| 4512 |
return false; |
4512 |
return false; |
| 4513 |
|
4513 |
|
| 4514 |
// Check if the mask is -1. In that case, this is an unnecessary instruction |
4514 |
// Check if the mask is -1. In that case, this is an unnecessary instruction |
| 4515 |
// that escaped earlier analysis. |
4515 |
// that escaped earlier analysis. |
| 4516 |
if (NegMaskVal.isAllOnes()) { |
4516 |
if (NegMaskVal.isAllOnes()) { |
| 4517 |
ReplaceNode(And, And0.getNode()); |
4517 |
ReplaceNode(And, And0.getNode()); |
| 4518 |
return true; |
4518 |
return true; |
| 4519 |
} |
4519 |
} |
| 4520 |
|
4520 |
|
| 4521 |
// A negative mask allows a smaller encoding. Create a new 'and' node. |
4521 |
// A negative mask allows a smaller encoding. Create a new 'and' node. |
| 4522 |
SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); |
4522 |
SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); |
| 4523 |
insertDAGNode(*CurDAG, SDValue(And, 0), NewMask); |
4523 |
insertDAGNode(*CurDAG, SDValue(And, 0), NewMask); |
| 4524 |
SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); |
4524 |
SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); |
| 4525 |
ReplaceNode(And, NewAnd.getNode()); |
4525 |
ReplaceNode(And, NewAnd.getNode()); |
| 4526 |
SelectCode(NewAnd.getNode()); |
4526 |
SelectCode(NewAnd.getNode()); |
| 4527 |
return true; |
4527 |
return true; |
| 4528 |
} |
4528 |
} |
| 4529 |
|
4529 |
|
| 4530 |
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, |
4530 |
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, |
| 4531 |
bool FoldedBCast, bool Masked) { |
4531 |
bool FoldedBCast, bool Masked) { |
| 4532 |
#define VPTESTM_CASE(VT, SUFFIX) \ |
4532 |
#define VPTESTM_CASE(VT, SUFFIX) \ |
| 4533 |
case MVT::VT: \ |
4533 |
case MVT::VT: \ |
| 4534 |
if (Masked) \ |
4534 |
if (Masked) \ |
| 4535 |
return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \ |
4535 |
return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \ |
| 4536 |
return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX; |
4536 |
return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX; |
| 4537 |
|
4537 |
|
| 4538 |
|
4538 |
|
| 4539 |
#define VPTESTM_BROADCAST_CASES(SUFFIX) \ |
4539 |
#define VPTESTM_BROADCAST_CASES(SUFFIX) \ |
| 4540 |
default: llvm_unreachable("Unexpected VT!"); \ |
4540 |
default: llvm_unreachable("Unexpected VT!"); \ |
| 4541 |
VPTESTM_CASE(v4i32, DZ128##SUFFIX) \ |
4541 |
VPTESTM_CASE(v4i32, DZ128##SUFFIX) \ |
| 4542 |
VPTESTM_CASE(v2i64, QZ128##SUFFIX) \ |
4542 |
VPTESTM_CASE(v2i64, QZ128##SUFFIX) \ |
| 4543 |
VPTESTM_CASE(v8i32, DZ256##SUFFIX) \ |
4543 |
VPTESTM_CASE(v8i32, DZ256##SUFFIX) \ |
| 4544 |
VPTESTM_CASE(v4i64, QZ256##SUFFIX) \ |
4544 |
VPTESTM_CASE(v4i64, QZ256##SUFFIX) \ |
| 4545 |
VPTESTM_CASE(v16i32, DZ##SUFFIX) \ |
4545 |
VPTESTM_CASE(v16i32, DZ##SUFFIX) \ |
| 4546 |
VPTESTM_CASE(v8i64, QZ##SUFFIX) |
4546 |
VPTESTM_CASE(v8i64, QZ##SUFFIX) |
| 4547 |
|
4547 |
|
| 4548 |
#define VPTESTM_FULL_CASES(SUFFIX) \ |
4548 |
#define VPTESTM_FULL_CASES(SUFFIX) \ |
| 4549 |
VPTESTM_BROADCAST_CASES(SUFFIX) \ |
4549 |
VPTESTM_BROADCAST_CASES(SUFFIX) \ |
| 4550 |
VPTESTM_CASE(v16i8, BZ128##SUFFIX) \ |
4550 |
VPTESTM_CASE(v16i8, BZ128##SUFFIX) \ |
| 4551 |
VPTESTM_CASE(v8i16, WZ128##SUFFIX) \ |
4551 |
VPTESTM_CASE(v8i16, WZ128##SUFFIX) \ |
| 4552 |
VPTESTM_CASE(v32i8, BZ256##SUFFIX) \ |
4552 |
VPTESTM_CASE(v32i8, BZ256##SUFFIX) \ |
| 4553 |
VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ |
4553 |
VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ |
| 4554 |
VPTESTM_CASE(v64i8, BZ##SUFFIX) \ |
4554 |
VPTESTM_CASE(v64i8, BZ##SUFFIX) \ |
| 4555 |
VPTESTM_CASE(v32i16, WZ##SUFFIX) |
4555 |
VPTESTM_CASE(v32i16, WZ##SUFFIX) |
| 4556 |
|
4556 |
|
| 4557 |
if (FoldedBCast) { |
4557 |
if (FoldedBCast) { |
| 4558 |
switch (TestVT.SimpleTy) { |
4558 |
switch (TestVT.SimpleTy) { |
| 4559 |
VPTESTM_BROADCAST_CASES(rmb) |
4559 |
VPTESTM_BROADCAST_CASES(rmb) |
| 4560 |
} |
4560 |
} |
| 4561 |
} |
4561 |
} |
| 4562 |
|
4562 |
|
| 4563 |
if (FoldedLoad) { |
4563 |
if (FoldedLoad) { |
| 4564 |
switch (TestVT.SimpleTy) { |
4564 |
switch (TestVT.SimpleTy) { |
| 4565 |
VPTESTM_FULL_CASES(rm) |
4565 |
VPTESTM_FULL_CASES(rm) |
| 4566 |
} |
4566 |
} |
| 4567 |
} |
4567 |
} |
| 4568 |
|
4568 |
|
| 4569 |
switch (TestVT.SimpleTy) { |
4569 |
switch (TestVT.SimpleTy) { |
| 4570 |
VPTESTM_FULL_CASES(rr) |
4570 |
VPTESTM_FULL_CASES(rr) |
| 4571 |
} |
4571 |
} |
| 4572 |
|
4572 |
|
| 4573 |
#undef VPTESTM_FULL_CASES |
4573 |
#undef VPTESTM_FULL_CASES |
| 4574 |
#undef VPTESTM_BROADCAST_CASES |
4574 |
#undef VPTESTM_BROADCAST_CASES |
| 4575 |
#undef VPTESTM_CASE |
4575 |
#undef VPTESTM_CASE |
| 4576 |
} |
4576 |
} |
| 4577 |
|
4577 |
|
| 4578 |
// Try to create VPTESTM instruction. If InMask is not null, it will be used |
4578 |
// Try to create VPTESTM instruction. If InMask is not null, it will be used |
| 4579 |
// to form a masked operation. |
4579 |
// to form a masked operation. |
| 4580 |
bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, |
4580 |
bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, |
| 4581 |
SDValue InMask) { |
4581 |
SDValue InMask) { |
| 4582 |
assert(Subtarget->hasAVX512() && "Expected AVX512!"); |
4582 |
assert(Subtarget->hasAVX512() && "Expected AVX512!"); |
| 4583 |
assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && |
4583 |
assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && |
| 4584 |
"Unexpected VT!"); |
4584 |
"Unexpected VT!"); |
| 4585 |
|
4585 |
|
| 4586 |
// Look for equal and not equal compares. |
4586 |
// Look for equal and not equal compares. |
| 4587 |
ISD::CondCode CC = cast(Setcc.getOperand(2))->get(); |
4587 |
ISD::CondCode CC = cast(Setcc.getOperand(2))->get(); |
| 4588 |
if (CC != ISD::SETEQ && CC != ISD::SETNE) |
4588 |
if (CC != ISD::SETEQ && CC != ISD::SETNE) |
| 4589 |
return false; |
4589 |
return false; |
| 4590 |
|
4590 |
|
| 4591 |
SDValue SetccOp0 = Setcc.getOperand(0); |
4591 |
SDValue SetccOp0 = Setcc.getOperand(0); |
| 4592 |
SDValue SetccOp1 = Setcc.getOperand(1); |
4592 |
SDValue SetccOp1 = Setcc.getOperand(1); |
| 4593 |
|
4593 |
|
| 4594 |
// Canonicalize the all zero vector to the RHS. |
4594 |
// Canonicalize the all zero vector to the RHS. |
| 4595 |
if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) |
4595 |
if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) |
| 4596 |
std::swap(SetccOp0, SetccOp1); |
4596 |
std::swap(SetccOp0, SetccOp1); |
| 4597 |
|
4597 |
|
| 4598 |
// See if we're comparing against zero. |
4598 |
// See if we're comparing against zero. |
| 4599 |
if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) |
4599 |
if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) |
| 4600 |
return false; |
4600 |
return false; |
| 4601 |
|
4601 |
|
| 4602 |
SDValue N0 = SetccOp0; |
4602 |
SDValue N0 = SetccOp0; |
| 4603 |
|
4603 |
|
| 4604 |
MVT CmpVT = N0.getSimpleValueType(); |
4604 |
MVT CmpVT = N0.getSimpleValueType(); |
| 4605 |
MVT CmpSVT = CmpVT.getVectorElementType(); |
4605 |
MVT CmpSVT = CmpVT.getVectorElementType(); |
| 4606 |
|
4606 |
|
| 4607 |
// Start with both operands the same. We'll try to refine this. |
4607 |
// Start with both operands the same. We'll try to refine this. |
| 4608 |
SDValue Src0 = N0; |
4608 |
SDValue Src0 = N0; |
| 4609 |
SDValue Src1 = N0; |
4609 |
SDValue Src1 = N0; |
| 4610 |
|
4610 |
|
| 4611 |
{ |
4611 |
{ |
| 4612 |
// Look through single use bitcasts. |
4612 |
// Look through single use bitcasts. |
| 4613 |
SDValue N0Temp = N0; |
4613 |
SDValue N0Temp = N0; |
| 4614 |
if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) |
4614 |
if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) |
| 4615 |
N0Temp = N0.getOperand(0); |
4615 |
N0Temp = N0.getOperand(0); |
| 4616 |
|
4616 |
|
| 4617 |
// Look for single use AND. |
4617 |
// Look for single use AND. |
| 4618 |
if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { |
4618 |
if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { |
| 4619 |
Src0 = N0Temp.getOperand(0); |
4619 |
Src0 = N0Temp.getOperand(0); |
| 4620 |
Src1 = N0Temp.getOperand(1); |
4620 |
Src1 = N0Temp.getOperand(1); |
| 4621 |
} |
4621 |
} |
| 4622 |
} |
4622 |
} |
| 4623 |
|
4623 |
|
| 4624 |
// Without VLX we need to widen the operation. |
4624 |
// Without VLX we need to widen the operation. |
| 4625 |
bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); |
4625 |
bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); |
| 4626 |
|
4626 |
|
| 4627 |
auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, |
4627 |
auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, |
| 4628 |
SDValue &Base, SDValue &Scale, SDValue &Index, |
4628 |
SDValue &Base, SDValue &Scale, SDValue &Index, |
| 4629 |
SDValue &Disp, SDValue &Segment) { |
4629 |
SDValue &Disp, SDValue &Segment) { |
| 4630 |
// If we need to widen, we can't fold the load. |
4630 |
// If we need to widen, we can't fold the load. |
| 4631 |
if (!Widen) |
4631 |
if (!Widen) |
| 4632 |
if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
4632 |
if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
| 4633 |
return true; |
4633 |
return true; |
| 4634 |
|
4634 |
|
| 4635 |
// If we didn't fold a load, try to match broadcast. No widening limitation |
4635 |
// If we didn't fold a load, try to match broadcast. No widening limitation |
| 4636 |
// for this. But only 32 and 64 bit types are supported. |
4636 |
// for this. But only 32 and 64 bit types are supported. |
| 4637 |
if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) |
4637 |
if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) |
| 4638 |
return false; |
4638 |
return false; |
| 4639 |
|
4639 |
|
| 4640 |
// Look through single use bitcasts. |
4640 |
// Look through single use bitcasts. |
| 4641 |
if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
4641 |
if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
| 4642 |
P = L.getNode(); |
4642 |
P = L.getNode(); |
| 4643 |
L = L.getOperand(0); |
4643 |
L = L.getOperand(0); |
| 4644 |
} |
4644 |
} |
| 4645 |
|
4645 |
|
| 4646 |
if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
4646 |
if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
| 4647 |
return false; |
4647 |
return false; |
| 4648 |
|
4648 |
|
| 4649 |
auto *MemIntr = cast(L); |
4649 |
auto *MemIntr = cast(L); |
| 4650 |
if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) |
4650 |
if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) |
| 4651 |
return false; |
4651 |
return false; |
| 4652 |
|
4652 |
|
| 4653 |
return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
4653 |
return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
| 4654 |
}; |
4654 |
}; |
| 4655 |
|
4655 |
|
| 4656 |
// We can only fold loads if the sources are unique. |
4656 |
// We can only fold loads if the sources are unique. |
| 4657 |
bool CanFoldLoads = Src0 != Src1; |
4657 |
bool CanFoldLoads = Src0 != Src1; |
| 4658 |
|
4658 |
|
| 4659 |
bool FoldedLoad = false; |
4659 |
bool FoldedLoad = false; |
| 4660 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4660 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 4661 |
if (CanFoldLoads) { |
4661 |
if (CanFoldLoads) { |
| 4662 |
FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, |
4662 |
FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, |
| 4663 |
Tmp3, Tmp4); |
4663 |
Tmp3, Tmp4); |
| 4664 |
if (!FoldedLoad) { |
4664 |
if (!FoldedLoad) { |
| 4665 |
// And is commutative. |
4665 |
// And is commutative. |
| 4666 |
FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, |
4666 |
FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, |
| 4667 |
Tmp2, Tmp3, Tmp4); |
4667 |
Tmp2, Tmp3, Tmp4); |
| 4668 |
if (FoldedLoad) |
4668 |
if (FoldedLoad) |
| 4669 |
std::swap(Src0, Src1); |
4669 |
std::swap(Src0, Src1); |
| 4670 |
} |
4670 |
} |
| 4671 |
} |
4671 |
} |
| 4672 |
|
4672 |
|
| 4673 |
bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; |
4673 |
bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; |
| 4674 |
|
4674 |
|
| 4675 |
bool IsMasked = InMask.getNode() != nullptr; |
4675 |
bool IsMasked = InMask.getNode() != nullptr; |
| 4676 |
|
4676 |
|
| 4677 |
SDLoc dl(Root); |
4677 |
SDLoc dl(Root); |
| 4678 |
|
4678 |
|
| 4679 |
MVT ResVT = Setcc.getSimpleValueType(); |
4679 |
MVT ResVT = Setcc.getSimpleValueType(); |
| 4680 |
MVT MaskVT = ResVT; |
4680 |
MVT MaskVT = ResVT; |
| 4681 |
if (Widen) { |
4681 |
if (Widen) { |
| 4682 |
// Widen the inputs using insert_subreg or copy_to_regclass. |
4682 |
// Widen the inputs using insert_subreg or copy_to_regclass. |
| 4683 |
unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; |
4683 |
unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; |
| 4684 |
unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; |
4684 |
unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; |
| 4685 |
unsigned NumElts = CmpVT.getVectorNumElements() * Scale; |
4685 |
unsigned NumElts = CmpVT.getVectorNumElements() * Scale; |
| 4686 |
CmpVT = MVT::getVectorVT(CmpSVT, NumElts); |
4686 |
CmpVT = MVT::getVectorVT(CmpSVT, NumElts); |
| 4687 |
MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
4687 |
MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 4688 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl, |
4688 |
SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl, |
| 4689 |
CmpVT), 0); |
4689 |
CmpVT), 0); |
| 4690 |
Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); |
4690 |
Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); |
| 4691 |
|
4691 |
|
| 4692 |
if (!FoldedBCast) |
4692 |
if (!FoldedBCast) |
| 4693 |
Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); |
4693 |
Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); |
| 4694 |
|
4694 |
|
| 4695 |
if (IsMasked) { |
4695 |
if (IsMasked) { |
| 4696 |
// Widen the mask. |
4696 |
// Widen the mask. |
| 4697 |
unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID(); |
4697 |
unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID(); |
| 4698 |
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
4698 |
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
| 4699 |
InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
4699 |
InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
| 4700 |
dl, MaskVT, InMask, RC), 0); |
4700 |
dl, MaskVT, InMask, RC), 0); |
| 4701 |
} |
4701 |
} |
| 4702 |
} |
4702 |
} |
| 4703 |
|
4703 |
|
| 4704 |
bool IsTestN = CC == ISD::SETEQ; |
4704 |
bool IsTestN = CC == ISD::SETEQ; |
| 4705 |
unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast, |
4705 |
unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast, |
| 4706 |
IsMasked); |
4706 |
IsMasked); |
| 4707 |
|
4707 |
|
| 4708 |
MachineSDNode *CNode; |
4708 |
MachineSDNode *CNode; |
| 4709 |
if (FoldedLoad) { |
4709 |
if (FoldedLoad) { |
| 4710 |
SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); |
4710 |
SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); |
| 4711 |
|
4711 |
|
| 4712 |
if (IsMasked) { |
4712 |
if (IsMasked) { |
| 4713 |
SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
4713 |
SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
| 4714 |
Src1.getOperand(0) }; |
4714 |
Src1.getOperand(0) }; |
| 4715 |
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
4715 |
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
| 4716 |
} else { |
4716 |
} else { |
| 4717 |
SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
4717 |
SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
| 4718 |
Src1.getOperand(0) }; |
4718 |
Src1.getOperand(0) }; |
| 4719 |
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
4719 |
CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
| 4720 |
} |
4720 |
} |
| 4721 |
|
4721 |
|
| 4722 |
// Update the chain. |
4722 |
// Update the chain. |
| 4723 |
ReplaceUses(Src1.getValue(1), SDValue(CNode, 1)); |
4723 |
ReplaceUses(Src1.getValue(1), SDValue(CNode, 1)); |
| 4724 |
// Record the mem-refs |
4724 |
// Record the mem-refs |
| 4725 |
CurDAG->setNodeMemRefs(CNode, {cast(Src1)->getMemOperand()}); |
4725 |
CurDAG->setNodeMemRefs(CNode, {cast(Src1)->getMemOperand()}); |
| 4726 |
} else { |
4726 |
} else { |
| 4727 |
if (IsMasked) |
4727 |
if (IsMasked) |
| 4728 |
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); |
4728 |
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); |
| 4729 |
else |
4729 |
else |
| 4730 |
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1); |
4730 |
CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1); |
| 4731 |
} |
4731 |
} |
| 4732 |
|
4732 |
|
| 4733 |
// If we widened, we need to shrink the mask VT. |
4733 |
// If we widened, we need to shrink the mask VT. |
| 4734 |
if (Widen) { |
4734 |
if (Widen) { |
| 4735 |
unsigned RegClass = TLI->getRegClassFor(ResVT)->getID(); |
4735 |
unsigned RegClass = TLI->getRegClassFor(ResVT)->getID(); |
| 4736 |
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
4736 |
SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
| 4737 |
CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
4737 |
CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
| 4738 |
dl, ResVT, SDValue(CNode, 0), RC); |
4738 |
dl, ResVT, SDValue(CNode, 0), RC); |
| 4739 |
} |
4739 |
} |
| 4740 |
|
4740 |
|
| 4741 |
ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0)); |
4741 |
ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0)); |
| 4742 |
CurDAG->RemoveDeadNode(Root); |
4742 |
CurDAG->RemoveDeadNode(Root); |
| 4743 |
return true; |
4743 |
return true; |
| 4744 |
} |
4744 |
} |
| 4745 |
|
4745 |
|
| 4746 |
// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it |
4746 |
// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it |
| 4747 |
// into vpternlog. |
4747 |
// into vpternlog. |
| 4748 |
bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { |
4748 |
bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { |
| 4749 |
assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); |
4749 |
assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); |
| 4750 |
|
4750 |
|
| 4751 |
MVT NVT = N->getSimpleValueType(0); |
4751 |
MVT NVT = N->getSimpleValueType(0); |
| 4752 |
|
4752 |
|
| 4753 |
// Make sure we support VPTERNLOG. |
4753 |
// Make sure we support VPTERNLOG. |
| 4754 |
if (!NVT.isVector() || !Subtarget->hasAVX512()) |
4754 |
if (!NVT.isVector() || !Subtarget->hasAVX512()) |
| 4755 |
return false; |
4755 |
return false; |
| 4756 |
|
4756 |
|
| 4757 |
// We need VLX for 128/256-bit. |
4757 |
// We need VLX for 128/256-bit. |
| 4758 |
if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
4758 |
if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
| 4759 |
return false; |
4759 |
return false; |
| 4760 |
|
4760 |
|
| 4761 |
SDValue N0 = N->getOperand(0); |
4761 |
SDValue N0 = N->getOperand(0); |
| 4762 |
SDValue N1 = N->getOperand(1); |
4762 |
SDValue N1 = N->getOperand(1); |
| 4763 |
|
4763 |
|
| 4764 |
// Canonicalize AND to LHS. |
4764 |
// Canonicalize AND to LHS. |
| 4765 |
if (N1.getOpcode() == ISD::AND) |
4765 |
if (N1.getOpcode() == ISD::AND) |
| 4766 |
std::swap(N0, N1); |
4766 |
std::swap(N0, N1); |
| 4767 |
|
4767 |
|
| 4768 |
if (N0.getOpcode() != ISD::AND || |
4768 |
if (N0.getOpcode() != ISD::AND || |
| 4769 |
N1.getOpcode() != X86ISD::ANDNP || |
4769 |
N1.getOpcode() != X86ISD::ANDNP || |
| 4770 |
!N0.hasOneUse() || !N1.hasOneUse()) |
4770 |
!N0.hasOneUse() || !N1.hasOneUse()) |
| 4771 |
return false; |
4771 |
return false; |
| 4772 |
|
4772 |
|
| 4773 |
// ANDN is not commutable, use it to pick down A and C. |
4773 |
// ANDN is not commutable, use it to pick down A and C. |
| 4774 |
SDValue A = N1.getOperand(0); |
4774 |
SDValue A = N1.getOperand(0); |
| 4775 |
SDValue C = N1.getOperand(1); |
4775 |
SDValue C = N1.getOperand(1); |
| 4776 |
|
4776 |
|
| 4777 |
// AND is commutable, if one operand matches A, the other operand is B. |
4777 |
// AND is commutable, if one operand matches A, the other operand is B. |
| 4778 |
// Otherwise this isn't a match. |
4778 |
// Otherwise this isn't a match. |
| 4779 |
SDValue B; |
4779 |
SDValue B; |
| 4780 |
if (N0.getOperand(0) == A) |
4780 |
if (N0.getOperand(0) == A) |
| 4781 |
B = N0.getOperand(1); |
4781 |
B = N0.getOperand(1); |
| 4782 |
else if (N0.getOperand(1) == A) |
4782 |
else if (N0.getOperand(1) == A) |
| 4783 |
B = N0.getOperand(0); |
4783 |
B = N0.getOperand(0); |
| 4784 |
else |
4784 |
else |
| 4785 |
return false; |
4785 |
return false; |
| 4786 |
|
4786 |
|
| 4787 |
SDLoc dl(N); |
4787 |
SDLoc dl(N); |
| 4788 |
SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); |
4788 |
SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); |
| 4789 |
SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); |
4789 |
SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); |
| 4790 |
ReplaceNode(N, Ternlog.getNode()); |
4790 |
ReplaceNode(N, Ternlog.getNode()); |
| 4791 |
|
4791 |
|
| 4792 |
return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(), |
4792 |
return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(), |
| 4793 |
Ternlog.getNode(), A, B, C, 0xCA); |
4793 |
Ternlog.getNode(), A, B, C, 0xCA); |
| 4794 |
} |
4794 |
} |
| 4795 |
|
4795 |
|
| 4796 |
void X86DAGToDAGISel::Select(SDNode *Node) { |
4796 |
void X86DAGToDAGISel::Select(SDNode *Node) { |
| 4797 |
MVT NVT = Node->getSimpleValueType(0); |
4797 |
MVT NVT = Node->getSimpleValueType(0); |
| 4798 |
unsigned Opcode = Node->getOpcode(); |
4798 |
unsigned Opcode = Node->getOpcode(); |
| 4799 |
SDLoc dl(Node); |
4799 |
SDLoc dl(Node); |
| 4800 |
|
4800 |
|
| 4801 |
if (Node->isMachineOpcode()) { |
4801 |
if (Node->isMachineOpcode()) { |
| 4802 |
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); |
4802 |
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); |
| 4803 |
Node->setNodeId(-1); |
4803 |
Node->setNodeId(-1); |
| 4804 |
return; // Already selected. |
4804 |
return; // Already selected. |
| 4805 |
} |
4805 |
} |
| 4806 |
|
4806 |
|
| 4807 |
switch (Opcode) { |
4807 |
switch (Opcode) { |
| 4808 |
default: break; |
4808 |
default: break; |
| 4809 |
case ISD::INTRINSIC_W_CHAIN: { |
4809 |
case ISD::INTRINSIC_W_CHAIN: { |
| 4810 |
unsigned IntNo = Node->getConstantOperandVal(1); |
4810 |
unsigned IntNo = Node->getConstantOperandVal(1); |
| 4811 |
switch (IntNo) { |
4811 |
switch (IntNo) { |
| 4812 |
default: break; |
4812 |
default: break; |
| 4813 |
case Intrinsic::x86_encodekey128: |
4813 |
case Intrinsic::x86_encodekey128: |
| 4814 |
case Intrinsic::x86_encodekey256: { |
4814 |
case Intrinsic::x86_encodekey256: { |
| 4815 |
if (!Subtarget->hasKL()) |
4815 |
if (!Subtarget->hasKL()) |
| 4816 |
break; |
4816 |
break; |
| 4817 |
|
4817 |
|
| 4818 |
unsigned Opcode; |
4818 |
unsigned Opcode; |
| 4819 |
switch (IntNo) { |
4819 |
switch (IntNo) { |
| 4820 |
default: llvm_unreachable("Impossible intrinsic"); |
4820 |
default: llvm_unreachable("Impossible intrinsic"); |
| 4821 |
case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break; |
4821 |
case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break; |
| 4822 |
case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break; |
4822 |
case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break; |
| 4823 |
} |
4823 |
} |
| 4824 |
|
4824 |
|
| 4825 |
SDValue Chain = Node->getOperand(0); |
4825 |
SDValue Chain = Node->getOperand(0); |
| 4826 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3), |
4826 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3), |
| 4827 |
SDValue()); |
4827 |
SDValue()); |
| 4828 |
if (Opcode == X86::ENCODEKEY256) |
4828 |
if (Opcode == X86::ENCODEKEY256) |
| 4829 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4), |
4829 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4), |
| 4830 |
Chain.getValue(1)); |
4830 |
Chain.getValue(1)); |
| 4831 |
|
4831 |
|
| 4832 |
MachineSDNode *Res = CurDAG->getMachineNode( |
4832 |
MachineSDNode *Res = CurDAG->getMachineNode( |
| 4833 |
Opcode, dl, Node->getVTList(), |
4833 |
Opcode, dl, Node->getVTList(), |
| 4834 |
{Node->getOperand(2), Chain, Chain.getValue(1)}); |
4834 |
{Node->getOperand(2), Chain, Chain.getValue(1)}); |
| 4835 |
ReplaceNode(Node, Res); |
4835 |
ReplaceNode(Node, Res); |
| 4836 |
return; |
4836 |
return; |
| 4837 |
} |
4837 |
} |
| 4838 |
case Intrinsic::x86_tileloadd64_internal: |
4838 |
case Intrinsic::x86_tileloadd64_internal: |
| 4839 |
case Intrinsic::x86_tileloaddt164_internal: { |
4839 |
case Intrinsic::x86_tileloaddt164_internal: { |
| 4840 |
if (!Subtarget->hasAMXTILE()) |
4840 |
if (!Subtarget->hasAMXTILE()) |
| 4841 |
break; |
4841 |
break; |
| 4842 |
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal |
4842 |
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal |
| 4843 |
? X86::PTILELOADDV |
4843 |
? X86::PTILELOADDV |
| 4844 |
: X86::PTILELOADDT1V; |
4844 |
: X86::PTILELOADDT1V; |
| 4845 |
// _tile_loadd_internal(row, col, buf, STRIDE) |
4845 |
// _tile_loadd_internal(row, col, buf, STRIDE) |
| 4846 |
SDValue Base = Node->getOperand(4); |
4846 |
SDValue Base = Node->getOperand(4); |
| 4847 |
SDValue Scale = getI8Imm(1, dl); |
4847 |
SDValue Scale = getI8Imm(1, dl); |
| 4848 |
SDValue Index = Node->getOperand(5); |
4848 |
SDValue Index = Node->getOperand(5); |
| 4849 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4849 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
| 4850 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4850 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
| 4851 |
SDValue Chain = Node->getOperand(0); |
4851 |
SDValue Chain = Node->getOperand(0); |
| 4852 |
MachineSDNode *CNode; |
4852 |
MachineSDNode *CNode; |
| 4853 |
SDValue Ops[] = {Node->getOperand(2), |
4853 |
SDValue Ops[] = {Node->getOperand(2), |
| 4854 |
Node->getOperand(3), |
4854 |
Node->getOperand(3), |
| 4855 |
Base, |
4855 |
Base, |
| 4856 |
Scale, |
4856 |
Scale, |
| 4857 |
Index, |
4857 |
Index, |
| 4858 |
Disp, |
4858 |
Disp, |
| 4859 |
Segment, |
4859 |
Segment, |
| 4860 |
Chain}; |
4860 |
Chain}; |
| 4861 |
CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops); |
4861 |
CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops); |
| 4862 |
ReplaceNode(Node, CNode); |
4862 |
ReplaceNode(Node, CNode); |
| 4863 |
return; |
4863 |
return; |
| 4864 |
} |
4864 |
} |
| 4865 |
} |
4865 |
} |
| 4866 |
break; |
4866 |
break; |
| 4867 |
} |
4867 |
} |
| 4868 |
case ISD::INTRINSIC_VOID: { |
4868 |
case ISD::INTRINSIC_VOID: { |
| 4869 |
unsigned IntNo = Node->getConstantOperandVal(1); |
4869 |
unsigned IntNo = Node->getConstantOperandVal(1); |
| 4870 |
switch (IntNo) { |
4870 |
switch (IntNo) { |
| 4871 |
default: break; |
4871 |
default: break; |
| 4872 |
case Intrinsic::x86_sse3_monitor: |
4872 |
case Intrinsic::x86_sse3_monitor: |
| 4873 |
case Intrinsic::x86_monitorx: |
4873 |
case Intrinsic::x86_monitorx: |
| 4874 |
case Intrinsic::x86_clzero: { |
4874 |
case Intrinsic::x86_clzero: { |
| 4875 |
bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; |
4875 |
bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; |
| 4876 |
|
4876 |
|
| 4877 |
unsigned Opc = 0; |
4877 |
unsigned Opc = 0; |
| 4878 |
switch (IntNo) { |
4878 |
switch (IntNo) { |
| 4879 |
default: llvm_unreachable("Unexpected intrinsic!"); |
4879 |
default: llvm_unreachable("Unexpected intrinsic!"); |
| 4880 |
case Intrinsic::x86_sse3_monitor: |
4880 |
case Intrinsic::x86_sse3_monitor: |
| 4881 |
if (!Subtarget->hasSSE3()) |
4881 |
if (!Subtarget->hasSSE3()) |
| 4882 |
break; |
4882 |
break; |
| 4883 |
Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; |
4883 |
Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; |
| 4884 |
break; |
4884 |
break; |
| 4885 |
case Intrinsic::x86_monitorx: |
4885 |
case Intrinsic::x86_monitorx: |
| 4886 |
if (!Subtarget->hasMWAITX()) |
4886 |
if (!Subtarget->hasMWAITX()) |
| 4887 |
break; |
4887 |
break; |
| 4888 |
Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; |
4888 |
Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; |
| 4889 |
break; |
4889 |
break; |
| 4890 |
case Intrinsic::x86_clzero: |
4890 |
case Intrinsic::x86_clzero: |
| 4891 |
if (!Subtarget->hasCLZERO()) |
4891 |
if (!Subtarget->hasCLZERO()) |
| 4892 |
break; |
4892 |
break; |
| 4893 |
Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; |
4893 |
Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; |
| 4894 |
break; |
4894 |
break; |
| 4895 |
} |
4895 |
} |
| 4896 |
|
4896 |
|
| 4897 |
if (Opc) { |
4897 |
if (Opc) { |
| 4898 |
unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; |
4898 |
unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; |
| 4899 |
SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, |
4899 |
SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, |
| 4900 |
Node->getOperand(2), SDValue()); |
4900 |
Node->getOperand(2), SDValue()); |
| 4901 |
SDValue InGlue = Chain.getValue(1); |
4901 |
SDValue InGlue = Chain.getValue(1); |
| 4902 |
|
4902 |
|
| 4903 |
if (IntNo == Intrinsic::x86_sse3_monitor || |
4903 |
if (IntNo == Intrinsic::x86_sse3_monitor || |
| 4904 |
IntNo == Intrinsic::x86_monitorx) { |
4904 |
IntNo == Intrinsic::x86_monitorx) { |
| 4905 |
// Copy the other two operands to ECX and EDX. |
4905 |
// Copy the other two operands to ECX and EDX. |
| 4906 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), |
4906 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), |
| 4907 |
InGlue); |
4907 |
InGlue); |
| 4908 |
InGlue = Chain.getValue(1); |
4908 |
InGlue = Chain.getValue(1); |
| 4909 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), |
4909 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), |
| 4910 |
InGlue); |
4910 |
InGlue); |
| 4911 |
InGlue = Chain.getValue(1); |
4911 |
InGlue = Chain.getValue(1); |
| 4912 |
} |
4912 |
} |
| 4913 |
|
4913 |
|
| 4914 |
MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, |
4914 |
MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, |
| 4915 |
{ Chain, InGlue}); |
4915 |
{ Chain, InGlue}); |
| 4916 |
ReplaceNode(Node, CNode); |
4916 |
ReplaceNode(Node, CNode); |
| 4917 |
return; |
4917 |
return; |
| 4918 |
} |
4918 |
} |
| 4919 |
|
4919 |
|
| 4920 |
break; |
4920 |
break; |
| 4921 |
} |
4921 |
} |
| 4922 |
case Intrinsic::x86_tilestored64_internal: { |
4922 |
case Intrinsic::x86_tilestored64_internal: { |
| 4923 |
unsigned Opc = X86::PTILESTOREDV; |
4923 |
unsigned Opc = X86::PTILESTOREDV; |
| 4924 |
// _tile_stored_internal(row, col, buf, STRIDE, c) |
4924 |
// _tile_stored_internal(row, col, buf, STRIDE, c) |
| 4925 |
SDValue Base = Node->getOperand(4); |
4925 |
SDValue Base = Node->getOperand(4); |
| 4926 |
SDValue Scale = getI8Imm(1, dl); |
4926 |
SDValue Scale = getI8Imm(1, dl); |
| 4927 |
SDValue Index = Node->getOperand(5); |
4927 |
SDValue Index = Node->getOperand(5); |
| 4928 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4928 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
| 4929 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4929 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
| 4930 |
SDValue Chain = Node->getOperand(0); |
4930 |
SDValue Chain = Node->getOperand(0); |
| 4931 |
MachineSDNode *CNode; |
4931 |
MachineSDNode *CNode; |
| 4932 |
SDValue Ops[] = {Node->getOperand(2), |
4932 |
SDValue Ops[] = {Node->getOperand(2), |
| 4933 |
Node->getOperand(3), |
4933 |
Node->getOperand(3), |
| 4934 |
Base, |
4934 |
Base, |
| 4935 |
Scale, |
4935 |
Scale, |
| 4936 |
Index, |
4936 |
Index, |
| 4937 |
Disp, |
4937 |
Disp, |
| 4938 |
Segment, |
4938 |
Segment, |
| 4939 |
Node->getOperand(6), |
4939 |
Node->getOperand(6), |
| 4940 |
Chain}; |
4940 |
Chain}; |
| 4941 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4941 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
| 4942 |
ReplaceNode(Node, CNode); |
4942 |
ReplaceNode(Node, CNode); |
| 4943 |
return; |
4943 |
return; |
| 4944 |
} |
4944 |
} |
| 4945 |
case Intrinsic::x86_tileloadd64: |
4945 |
case Intrinsic::x86_tileloadd64: |
| 4946 |
case Intrinsic::x86_tileloaddt164: |
4946 |
case Intrinsic::x86_tileloaddt164: |
| 4947 |
case Intrinsic::x86_tilestored64: { |
4947 |
case Intrinsic::x86_tilestored64: { |
| 4948 |
if (!Subtarget->hasAMXTILE()) |
4948 |
if (!Subtarget->hasAMXTILE()) |
| 4949 |
break; |
4949 |
break; |
| 4950 |
unsigned Opc; |
4950 |
unsigned Opc; |
| 4951 |
switch (IntNo) { |
4951 |
switch (IntNo) { |
| 4952 |
default: llvm_unreachable("Unexpected intrinsic!"); |
4952 |
default: llvm_unreachable("Unexpected intrinsic!"); |
| 4953 |
case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break; |
4953 |
case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break; |
| 4954 |
case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break; |
4954 |
case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break; |
| 4955 |
case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break; |
4955 |
case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break; |
| 4956 |
} |
4956 |
} |
| 4957 |
// FIXME: Match displacement and scale. |
4957 |
// FIXME: Match displacement and scale. |
| 4958 |
unsigned TIndex = Node->getConstantOperandVal(2); |
4958 |
unsigned TIndex = Node->getConstantOperandVal(2); |
| 4959 |
SDValue TReg = getI8Imm(TIndex, dl); |
4959 |
SDValue TReg = getI8Imm(TIndex, dl); |
| 4960 |
SDValue Base = Node->getOperand(3); |
4960 |
SDValue Base = Node->getOperand(3); |
| 4961 |
SDValue Scale = getI8Imm(1, dl); |
4961 |
SDValue Scale = getI8Imm(1, dl); |
| 4962 |
SDValue Index = Node->getOperand(4); |
4962 |
SDValue Index = Node->getOperand(4); |
| 4963 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4963 |
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
| 4964 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4964 |
SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
| 4965 |
SDValue Chain = Node->getOperand(0); |
4965 |
SDValue Chain = Node->getOperand(0); |
| 4966 |
MachineSDNode *CNode; |
4966 |
MachineSDNode *CNode; |
| 4967 |
if (Opc == X86::PTILESTORED) { |
4967 |
if (Opc == X86::PTILESTORED) { |
| 4968 |
SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain }; |
4968 |
SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain }; |
| 4969 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4969 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
| 4970 |
} else { |
4970 |
} else { |
| 4971 |
SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain }; |
4971 |
SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain }; |
| 4972 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4972 |
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
| 4973 |
} |
4973 |
} |
| 4974 |
ReplaceNode(Node, CNode); |
4974 |
ReplaceNode(Node, CNode); |
| 4975 |
return; |
4975 |
return; |
| 4976 |
} |
4976 |
} |
| 4977 |
} |
4977 |
} |
| 4978 |
break; |
4978 |
break; |
| 4979 |
} |
4979 |
} |
| 4980 |
case ISD::BRIND: |
4980 |
case ISD::BRIND: |
| 4981 |
case X86ISD::NT_BRIND: { |
4981 |
case X86ISD::NT_BRIND: { |
| 4982 |
if (Subtarget->isTargetNaCl()) |
4982 |
if (Subtarget->isTargetNaCl()) |
| 4983 |
// NaCl has its own pass where jmp %r32 are converted to jmp %r64. We |
4983 |
// NaCl has its own pass where jmp %r32 are converted to jmp %r64. We |
| 4984 |
// leave the instruction alone. |
4984 |
// leave the instruction alone. |
| 4985 |
break; |
4985 |
break; |
| 4986 |
if (Subtarget->isTarget64BitILP32()) { |
4986 |
if (Subtarget->isTarget64BitILP32()) { |
| 4987 |
// Converts a 32-bit register to a 64-bit, zero-extended version of |
4987 |
// Converts a 32-bit register to a 64-bit, zero-extended version of |
| 4988 |
// it. This is needed because x86-64 can do many things, but jmp %r32 |
4988 |
// it. This is needed because x86-64 can do many things, but jmp %r32 |
| 4989 |
// ain't one of them. |
4989 |
// ain't one of them. |
| 4990 |
SDValue Target = Node->getOperand(1); |
4990 |
SDValue Target = Node->getOperand(1); |
| 4991 |
assert(Target.getValueType() == MVT::i32 && "Unexpected VT!"); |
4991 |
assert(Target.getValueType() == MVT::i32 && "Unexpected VT!"); |
| 4992 |
SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64); |
4992 |
SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64); |
| 4993 |
SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other, |
4993 |
SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other, |
| 4994 |
Node->getOperand(0), ZextTarget); |
4994 |
Node->getOperand(0), ZextTarget); |
| 4995 |
ReplaceNode(Node, Brind.getNode()); |
4995 |
ReplaceNode(Node, Brind.getNode()); |
| 4996 |
SelectCode(ZextTarget.getNode()); |
4996 |
SelectCode(ZextTarget.getNode()); |
| 4997 |
SelectCode(Brind.getNode()); |
4997 |
SelectCode(Brind.getNode()); |
| 4998 |
return; |
4998 |
return; |
| 4999 |
} |
4999 |
} |
| 5000 |
break; |
5000 |
break; |
| 5001 |
} |
5001 |
} |
| 5002 |
case X86ISD::GlobalBaseReg: |
5002 |
case X86ISD::GlobalBaseReg: |
| 5003 |
ReplaceNode(Node, getGlobalBaseReg()); |
5003 |
ReplaceNode(Node, getGlobalBaseReg()); |
| 5004 |
return; |
5004 |
return; |
| 5005 |
|
5005 |
|
| 5006 |
case ISD::BITCAST: |
5006 |
case ISD::BITCAST: |
| 5007 |
// Just drop all 128/256/512-bit bitcasts. |
5007 |
// Just drop all 128/256/512-bit bitcasts. |
| 5008 |
if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || |
5008 |
if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || |
| 5009 |
NVT == MVT::f128) { |
5009 |
NVT == MVT::f128) { |
| 5010 |
ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); |
5010 |
ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); |
| 5011 |
CurDAG->RemoveDeadNode(Node); |
5011 |
CurDAG->RemoveDeadNode(Node); |
| 5012 |
return; |
5012 |
return; |
| 5013 |
} |
5013 |
} |
| 5014 |
break; |
5014 |
break; |
| 5015 |
|
5015 |
|
| 5016 |
case ISD::SRL: |
5016 |
case ISD::SRL: |
| 5017 |
if (matchBitExtract(Node)) |
5017 |
if (matchBitExtract(Node)) |
| 5018 |
return; |
5018 |
return; |
| 5019 |
[[fallthrough]]; |
5019 |
[[fallthrough]]; |
| 5020 |
case ISD::SRA: |
5020 |
case ISD::SRA: |
| 5021 |
case ISD::SHL: |
5021 |
case ISD::SHL: |
| 5022 |
if (tryShiftAmountMod(Node)) |
5022 |
if (tryShiftAmountMod(Node)) |
| 5023 |
return; |
5023 |
return; |
| 5024 |
break; |
5024 |
break; |
| 5025 |
|
5025 |
|
| 5026 |
case X86ISD::VPTERNLOG: { |
5026 |
case X86ISD::VPTERNLOG: { |
| 5027 |
uint8_t Imm = cast(Node->getOperand(3))->getZExtValue(); |
5027 |
uint8_t Imm = cast(Node->getOperand(3))->getZExtValue(); |
| 5028 |
if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0), |
5028 |
if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0), |
| 5029 |
Node->getOperand(1), Node->getOperand(2), Imm)) |
5029 |
Node->getOperand(1), Node->getOperand(2), Imm)) |
| 5030 |
return; |
5030 |
return; |
| 5031 |
break; |
5031 |
break; |
| 5032 |
} |
5032 |
} |
| 5033 |
|
5033 |
|
| 5034 |
case X86ISD::ANDNP: |
5034 |
case X86ISD::ANDNP: |
| 5035 |
if (tryVPTERNLOG(Node)) |
5035 |
if (tryVPTERNLOG(Node)) |
| 5036 |
return; |
5036 |
return; |
| 5037 |
break; |
5037 |
break; |
| 5038 |
|
5038 |
|
| 5039 |
case ISD::AND: |
5039 |
case ISD::AND: |
| 5040 |
if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { |
5040 |
if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { |
| 5041 |
// Try to form a masked VPTESTM. Operands can be in either order. |
5041 |
// Try to form a masked VPTESTM. Operands can be in either order. |
| 5042 |
SDValue N0 = Node->getOperand(0); |
5042 |
SDValue N0 = Node->getOperand(0); |
| 5043 |
SDValue N1 = Node->getOperand(1); |
5043 |
SDValue N1 = Node->getOperand(1); |
| 5044 |
if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && |
5044 |
if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && |
| 5045 |
tryVPTESTM(Node, N0, N1)) |
5045 |
tryVPTESTM(Node, N0, N1)) |
| 5046 |
return; |
5046 |
return; |
| 5047 |
if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
5047 |
if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
| 5048 |
tryVPTESTM(Node, N1, N0)) |
5048 |
tryVPTESTM(Node, N1, N0)) |
| 5049 |
return; |
5049 |
return; |
| 5050 |
} |
5050 |
} |
| 5051 |
|
5051 |
|
| 5052 |
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { |
5052 |
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { |
| 5053 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
5053 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
| 5054 |
CurDAG->RemoveDeadNode(Node); |
5054 |
CurDAG->RemoveDeadNode(Node); |
| 5055 |
return; |
5055 |
return; |
| 5056 |
} |
5056 |
} |
| 5057 |
if (matchBitExtract(Node)) |
5057 |
if (matchBitExtract(Node)) |
| 5058 |
return; |
5058 |
return; |
| 5059 |
if (AndImmShrink && shrinkAndImmediate(Node)) |
5059 |
if (AndImmShrink && shrinkAndImmediate(Node)) |
| 5060 |
return; |
5060 |
return; |
| 5061 |
|
5061 |
|
| 5062 |
[[fallthrough]]; |
5062 |
[[fallthrough]]; |
| 5063 |
case ISD::OR: |
5063 |
case ISD::OR: |
| 5064 |
case ISD::XOR: |
5064 |
case ISD::XOR: |
| 5065 |
if (tryShrinkShlLogicImm(Node)) |
5065 |
if (tryShrinkShlLogicImm(Node)) |
| 5066 |
return; |
5066 |
return; |
| 5067 |
if (Opcode == ISD::OR && tryMatchBitSelect(Node)) |
5067 |
if (Opcode == ISD::OR && tryMatchBitSelect(Node)) |
| 5068 |
return; |
5068 |
return; |
| 5069 |
if (tryVPTERNLOG(Node)) |
5069 |
if (tryVPTERNLOG(Node)) |
| 5070 |
return; |
5070 |
return; |
| 5071 |
|
5071 |
|
| 5072 |
[[fallthrough]]; |
5072 |
[[fallthrough]]; |
| 5073 |
case ISD::ADD: |
5073 |
case ISD::ADD: |
| 5074 |
if (Opcode == ISD::ADD && matchBitExtract(Node)) |
5074 |
if (Opcode == ISD::ADD && matchBitExtract(Node)) |
| 5075 |
return; |
5075 |
return; |
| 5076 |
[[fallthrough]]; |
5076 |
[[fallthrough]]; |
| 5077 |
case ISD::SUB: { |
5077 |
case ISD::SUB: { |
| 5078 |
// Try to avoid folding immediates with multiple uses for optsize. |
5078 |
// Try to avoid folding immediates with multiple uses for optsize. |
| 5079 |
// This code tries to select to register form directly to avoid going |
5079 |
// This code tries to select to register form directly to avoid going |
| 5080 |
// through the isel table which might fold the immediate. We can't change |
5080 |
// through the isel table which might fold the immediate. We can't change |
| 5081 |
// the patterns on the add/sub/and/or/xor with immediate paterns in the |
5081 |
// the patterns on the add/sub/and/or/xor with immediate paterns in the |
| 5082 |
// tablegen files to check immediate use count without making the patterns |
5082 |
// tablegen files to check immediate use count without making the patterns |
| 5083 |
// unavailable to the fast-isel table. |
5083 |
// unavailable to the fast-isel table. |
| 5084 |
if (!CurDAG->shouldOptForSize()) |
5084 |
if (!CurDAG->shouldOptForSize()) |
| 5085 |
break; |
5085 |
break; |
| 5086 |
|
5086 |
|
| 5087 |
// Only handle i8/i16/i32/i64. |
5087 |
// Only handle i8/i16/i32/i64. |
| 5088 |
if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) |
5088 |
if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) |
| 5089 |
break; |
5089 |
break; |
| 5090 |
|
5090 |
|
| 5091 |
SDValue N0 = Node->getOperand(0); |
5091 |
SDValue N0 = Node->getOperand(0); |
| 5092 |
SDValue N1 = Node->getOperand(1); |
5092 |
SDValue N1 = Node->getOperand(1); |
| 5093 |
|
5093 |
|
| 5094 |
auto *Cst = dyn_cast(N1); |
5094 |
auto *Cst = dyn_cast(N1); |
| 5095 |
if (!Cst) |
5095 |
if (!Cst) |
| 5096 |
break; |
5096 |
break; |
| 5097 |
|
5097 |
|
| 5098 |
int64_t Val = Cst->getSExtValue(); |
5098 |
int64_t Val = Cst->getSExtValue(); |
| 5099 |
|
5099 |
|
| 5100 |
// Make sure its an immediate that is considered foldable. |
5100 |
// Make sure its an immediate that is considered foldable. |
| 5101 |
// FIXME: Handle unsigned 32 bit immediates for 64-bit AND. |
5101 |
// FIXME: Handle unsigned 32 bit immediates for 64-bit AND. |
| 5102 |
if (!isInt<8>(Val) && !isInt<32>(Val)) |
5102 |
if (!isInt<8>(Val) && !isInt<32>(Val)) |
| 5103 |
break; |
5103 |
break; |
| 5104 |
|
5104 |
|
| 5105 |
// If this can match to INC/DEC, let it go. |
5105 |
// If this can match to INC/DEC, let it go. |
| 5106 |
if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) |
5106 |
if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) |
| 5107 |
break; |
5107 |
break; |
| 5108 |
|
5108 |
|
| 5109 |
// Check if we should avoid folding this immediate. |
5109 |
// Check if we should avoid folding this immediate. |
| 5110 |
if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) |
5110 |
if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) |
| 5111 |
break; |
5111 |
break; |
| 5112 |
|
5112 |
|
| 5113 |
// We should not fold the immediate. So we need a register form instead. |
5113 |
// We should not fold the immediate. So we need a register form instead. |
| 5114 |
unsigned ROpc, MOpc; |
5114 |
unsigned ROpc, MOpc; |
| 5115 |
switch (NVT.SimpleTy) { |
5115 |
switch (NVT.SimpleTy) { |
| 5116 |
default: llvm_unreachable("Unexpected VT!"); |
5116 |
default: llvm_unreachable("Unexpected VT!"); |
| 5117 |
case MVT::i8: |
5117 |
case MVT::i8: |
| 5118 |
switch (Opcode) { |
5118 |
switch (Opcode) { |
| 5119 |
default: llvm_unreachable("Unexpected opcode!"); |
5119 |
default: llvm_unreachable("Unexpected opcode!"); |
| 5120 |
case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; |
5120 |
case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; |
| 5121 |
case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; |
5121 |
case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; |
| 5122 |
case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; |
5122 |
case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; |
| 5123 |
case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; |
5123 |
case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; |
| 5124 |
case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; |
5124 |
case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; |
| 5125 |
} |
5125 |
} |
| 5126 |
break; |
5126 |
break; |
| 5127 |
case MVT::i16: |
5127 |
case MVT::i16: |
| 5128 |
switch (Opcode) { |
5128 |
switch (Opcode) { |
| 5129 |
default: llvm_unreachable("Unexpected opcode!"); |
5129 |
default: llvm_unreachable("Unexpected opcode!"); |
| 5130 |
case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; |
5130 |
case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; |
| 5131 |
case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; |
5131 |
case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; |
| 5132 |
case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; |
5132 |
case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; |
| 5133 |
case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; |
5133 |
case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; |
| 5134 |
case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; |
5134 |
case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; |
| 5135 |
} |
5135 |
} |
| 5136 |
break; |
5136 |
break; |
| 5137 |
case MVT::i32: |
5137 |
case MVT::i32: |
| 5138 |
switch (Opcode) { |
5138 |
switch (Opcode) { |
| 5139 |
default: llvm_unreachable("Unexpected opcode!"); |
5139 |
default: llvm_unreachable("Unexpected opcode!"); |
| 5140 |
case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; |
5140 |
case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; |
| 5141 |
case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; |
5141 |
case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; |
| 5142 |
case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; |
5142 |
case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; |
| 5143 |
case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; |
5143 |
case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; |
| 5144 |
case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; |
5144 |
case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; |
| 5145 |
} |
5145 |
} |
| 5146 |
break; |
5146 |
break; |
| 5147 |
case MVT::i64: |
5147 |
case MVT::i64: |
| 5148 |
switch (Opcode) { |
5148 |
switch (Opcode) { |
| 5149 |
default: llvm_unreachable("Unexpected opcode!"); |
5149 |
default: llvm_unreachable("Unexpected opcode!"); |
| 5150 |
case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; |
5150 |
case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; |
| 5151 |
case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; |
5151 |
case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; |
| 5152 |
case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; |
5152 |
case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; |
| 5153 |
case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; |
5153 |
case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; |
| 5154 |
case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; |
5154 |
case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; |
| 5155 |
} |
5155 |
} |
| 5156 |
break; |
5156 |
break; |
| 5157 |
} |
5157 |
} |
| 5158 |
|
5158 |
|
| 5159 |
// Ok this is a AND/OR/XOR/ADD/SUB with constant. |
5159 |
// Ok this is a AND/OR/XOR/ADD/SUB with constant. |
| 5160 |
|
5160 |
|
| 5161 |
// If this is a not a subtract, we can still try to fold a load. |
5161 |
// If this is a not a subtract, we can still try to fold a load. |
| 5162 |
if (Opcode != ISD::SUB) { |
5162 |
if (Opcode != ISD::SUB) { |
| 5163 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5163 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 5164 |
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
5164 |
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 5165 |
SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
5165 |
SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
| 5166 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
5166 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
| 5167 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5167 |
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 5168 |
// Update the chain. |
5168 |
// Update the chain. |
| 5169 |
ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); |
5169 |
ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); |
| 5170 |
// Record the mem-refs |
5170 |
// Record the mem-refs |
| 5171 |
CurDAG->setNodeMemRefs(CNode, {cast(N0)->getMemOperand()}); |
5171 |
CurDAG->setNodeMemRefs(CNode, {cast(N0)->getMemOperand()}); |
| 5172 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5172 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
| 5173 |
CurDAG->RemoveDeadNode(Node); |
5173 |
CurDAG->RemoveDeadNode(Node); |
| 5174 |
return; |
5174 |
return; |
| 5175 |
} |
5175 |
} |
| 5176 |
} |
5176 |
} |
| 5177 |
|
5177 |
|
| 5178 |
CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); |
5178 |
CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); |
| 5179 |
return; |
5179 |
return; |
| 5180 |
} |
5180 |
} |
| 5181 |
|
5181 |
|
| 5182 |
case X86ISD::SMUL: |
5182 |
case X86ISD::SMUL: |
| 5183 |
// i16/i32/i64 are handled with isel patterns. |
5183 |
// i16/i32/i64 are handled with isel patterns. |
| 5184 |
if (NVT != MVT::i8) |
5184 |
if (NVT != MVT::i8) |
| 5185 |
break; |
5185 |
break; |
| 5186 |
[[fallthrough]]; |
5186 |
[[fallthrough]]; |
| 5187 |
case X86ISD::UMUL: { |
5187 |
case X86ISD::UMUL: { |
| 5188 |
SDValue N0 = Node->getOperand(0); |
5188 |
SDValue N0 = Node->getOperand(0); |
| 5189 |
SDValue N1 = Node->getOperand(1); |
5189 |
SDValue N1 = Node->getOperand(1); |
| 5190 |
|
5190 |
|
| 5191 |
unsigned LoReg, ROpc, MOpc; |
5191 |
unsigned LoReg, ROpc, MOpc; |
| 5192 |
switch (NVT.SimpleTy) { |
5192 |
switch (NVT.SimpleTy) { |
| 5193 |
default: llvm_unreachable("Unsupported VT!"); |
5193 |
default: llvm_unreachable("Unsupported VT!"); |
| 5194 |
case MVT::i8: |
5194 |
case MVT::i8: |
| 5195 |
LoReg = X86::AL; |
5195 |
LoReg = X86::AL; |
| 5196 |
ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; |
5196 |
ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; |
| 5197 |
MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; |
5197 |
MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; |
| 5198 |
break; |
5198 |
break; |
| 5199 |
case MVT::i16: |
5199 |
case MVT::i16: |
| 5200 |
LoReg = X86::AX; |
5200 |
LoReg = X86::AX; |
| 5201 |
ROpc = X86::MUL16r; |
5201 |
ROpc = X86::MUL16r; |
| 5202 |
MOpc = X86::MUL16m; |
5202 |
MOpc = X86::MUL16m; |
| 5203 |
break; |
5203 |
break; |
| 5204 |
case MVT::i32: |
5204 |
case MVT::i32: |
| 5205 |
LoReg = X86::EAX; |
5205 |
LoReg = X86::EAX; |
| 5206 |
ROpc = X86::MUL32r; |
5206 |
ROpc = X86::MUL32r; |
| 5207 |
MOpc = X86::MUL32m; |
5207 |
MOpc = X86::MUL32m; |
| 5208 |
break; |
5208 |
break; |
| 5209 |
case MVT::i64: |
5209 |
case MVT::i64: |
| 5210 |
LoReg = X86::RAX; |
5210 |
LoReg = X86::RAX; |
| 5211 |
ROpc = X86::MUL64r; |
5211 |
ROpc = X86::MUL64r; |
| 5212 |
MOpc = X86::MUL64m; |
5212 |
MOpc = X86::MUL64m; |
| 5213 |
break; |
5213 |
break; |
| 5214 |
} |
5214 |
} |
| 5215 |
|
5215 |
|
| 5216 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5216 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 5217 |
bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5217 |
bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
| 5218 |
// Multiply is commutative. |
5218 |
// Multiply is commutative. |
| 5219 |
if (!FoldedLoad) { |
5219 |
if (!FoldedLoad) { |
| 5220 |
FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5220 |
FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
| 5221 |
if (FoldedLoad) |
5221 |
if (FoldedLoad) |
| 5222 |
std::swap(N0, N1); |
5222 |
std::swap(N0, N1); |
| 5223 |
} |
5223 |
} |
| 5224 |
|
5224 |
|
| 5225 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
5225 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
| 5226 |
N0, SDValue()).getValue(1); |
5226 |
N0, SDValue()).getValue(1); |
| 5227 |
|
5227 |
|
| 5228 |
MachineSDNode *CNode; |
5228 |
MachineSDNode *CNode; |
| 5229 |
if (FoldedLoad) { |
5229 |
if (FoldedLoad) { |
| 5230 |
// i16/i32/i64 use an instruction that produces a low and high result even |
5230 |
// i16/i32/i64 use an instruction that produces a low and high result even |
| 5231 |
// though only the low result is used. |
5231 |
// though only the low result is used. |
| 5232 |
SDVTList VTs; |
5232 |
SDVTList VTs; |
| 5233 |
if (NVT == MVT::i8) |
5233 |
if (NVT == MVT::i8) |
| 5234 |
VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
5234 |
VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
| 5235 |
else |
5235 |
else |
| 5236 |
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other); |
5236 |
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other); |
| 5237 |
|
5237 |
|
| 5238 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5238 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
| 5239 |
InGlue }; |
5239 |
InGlue }; |
| 5240 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5240 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 5241 |
|
5241 |
|
| 5242 |
// Update the chain. |
5242 |
// Update the chain. |
| 5243 |
ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); |
5243 |
ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); |
| 5244 |
// Record the mem-refs |
5244 |
// Record the mem-refs |
| 5245 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
5245 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
| 5246 |
} else { |
5246 |
} else { |
| 5247 |
// i16/i32/i64 use an instruction that produces a low and high result even |
5247 |
// i16/i32/i64 use an instruction that produces a low and high result even |
| 5248 |
// though only the low result is used. |
5248 |
// though only the low result is used. |
| 5249 |
SDVTList VTs; |
5249 |
SDVTList VTs; |
| 5250 |
if (NVT == MVT::i8) |
5250 |
if (NVT == MVT::i8) |
| 5251 |
VTs = CurDAG->getVTList(NVT, MVT::i32); |
5251 |
VTs = CurDAG->getVTList(NVT, MVT::i32); |
| 5252 |
else |
5252 |
else |
| 5253 |
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); |
5253 |
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); |
| 5254 |
|
5254 |
|
| 5255 |
CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue}); |
5255 |
CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue}); |
| 5256 |
} |
5256 |
} |
| 5257 |
|
5257 |
|
| 5258 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5258 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
| 5259 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); |
5259 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); |
| 5260 |
CurDAG->RemoveDeadNode(Node); |
5260 |
CurDAG->RemoveDeadNode(Node); |
| 5261 |
return; |
5261 |
return; |
| 5262 |
} |
5262 |
} |
| 5263 |
|
5263 |
|
| 5264 |
case ISD::SMUL_LOHI: |
5264 |
case ISD::SMUL_LOHI: |
| 5265 |
case ISD::UMUL_LOHI: { |
5265 |
case ISD::UMUL_LOHI: { |
| 5266 |
SDValue N0 = Node->getOperand(0); |
5266 |
SDValue N0 = Node->getOperand(0); |
| 5267 |
SDValue N1 = Node->getOperand(1); |
5267 |
SDValue N1 = Node->getOperand(1); |
| 5268 |
|
5268 |
|
| 5269 |
unsigned Opc, MOpc; |
5269 |
unsigned Opc, MOpc; |
| 5270 |
unsigned LoReg, HiReg; |
5270 |
unsigned LoReg, HiReg; |
| 5271 |
bool IsSigned = Opcode == ISD::SMUL_LOHI; |
5271 |
bool IsSigned = Opcode == ISD::SMUL_LOHI; |
| 5272 |
bool UseMULX = !IsSigned && Subtarget->hasBMI2(); |
5272 |
bool UseMULX = !IsSigned && Subtarget->hasBMI2(); |
| 5273 |
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); |
5273 |
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); |
| 5274 |
switch (NVT.SimpleTy) { |
5274 |
switch (NVT.SimpleTy) { |
| 5275 |
default: llvm_unreachable("Unsupported VT!"); |
5275 |
default: llvm_unreachable("Unsupported VT!"); |
| 5276 |
case MVT::i32: |
5276 |
case MVT::i32: |
| 5277 |
Opc = UseMULXHi ? X86::MULX32Hrr : |
5277 |
Opc = UseMULXHi ? X86::MULX32Hrr : |
| 5278 |
UseMULX ? X86::MULX32rr : |
5278 |
UseMULX ? X86::MULX32rr : |
| 5279 |
IsSigned ? X86::IMUL32r : X86::MUL32r; |
5279 |
IsSigned ? X86::IMUL32r : X86::MUL32r; |
| 5280 |
MOpc = UseMULXHi ? X86::MULX32Hrm : |
5280 |
MOpc = UseMULXHi ? X86::MULX32Hrm : |
| 5281 |
UseMULX ? X86::MULX32rm : |
5281 |
UseMULX ? X86::MULX32rm : |
| 5282 |
IsSigned ? X86::IMUL32m : X86::MUL32m; |
5282 |
IsSigned ? X86::IMUL32m : X86::MUL32m; |
| 5283 |
LoReg = UseMULX ? X86::EDX : X86::EAX; |
5283 |
LoReg = UseMULX ? X86::EDX : X86::EAX; |
| 5284 |
HiReg = X86::EDX; |
5284 |
HiReg = X86::EDX; |
| 5285 |
break; |
5285 |
break; |
| 5286 |
case MVT::i64: |
5286 |
case MVT::i64: |
| 5287 |
Opc = UseMULXHi ? X86::MULX64Hrr : |
5287 |
Opc = UseMULXHi ? X86::MULX64Hrr : |
| 5288 |
UseMULX ? X86::MULX64rr : |
5288 |
UseMULX ? X86::MULX64rr : |
| 5289 |
IsSigned ? X86::IMUL64r : X86::MUL64r; |
5289 |
IsSigned ? X86::IMUL64r : X86::MUL64r; |
| 5290 |
MOpc = UseMULXHi ? X86::MULX64Hrm : |
5290 |
MOpc = UseMULXHi ? X86::MULX64Hrm : |
| 5291 |
UseMULX ? X86::MULX64rm : |
5291 |
UseMULX ? X86::MULX64rm : |
| 5292 |
IsSigned ? X86::IMUL64m : X86::MUL64m; |
5292 |
IsSigned ? X86::IMUL64m : X86::MUL64m; |
| 5293 |
LoReg = UseMULX ? X86::RDX : X86::RAX; |
5293 |
LoReg = UseMULX ? X86::RDX : X86::RAX; |
| 5294 |
HiReg = X86::RDX; |
5294 |
HiReg = X86::RDX; |
| 5295 |
break; |
5295 |
break; |
| 5296 |
} |
5296 |
} |
| 5297 |
|
5297 |
|
| 5298 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5298 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 5299 |
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5299 |
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
| 5300 |
// Multiply is commutative. |
5300 |
// Multiply is commutative. |
| 5301 |
if (!foldedLoad) { |
5301 |
if (!foldedLoad) { |
| 5302 |
foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5302 |
foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
| 5303 |
if (foldedLoad) |
5303 |
if (foldedLoad) |
| 5304 |
std::swap(N0, N1); |
5304 |
std::swap(N0, N1); |
| 5305 |
} |
5305 |
} |
| 5306 |
|
5306 |
|
| 5307 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
5307 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
| 5308 |
N0, SDValue()).getValue(1); |
5308 |
N0, SDValue()).getValue(1); |
| 5309 |
SDValue ResHi, ResLo; |
5309 |
SDValue ResHi, ResLo; |
| 5310 |
if (foldedLoad) { |
5310 |
if (foldedLoad) { |
| 5311 |
SDValue Chain; |
5311 |
SDValue Chain; |
| 5312 |
MachineSDNode *CNode = nullptr; |
5312 |
MachineSDNode *CNode = nullptr; |
| 5313 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5313 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
| 5314 |
InGlue }; |
5314 |
InGlue }; |
| 5315 |
if (UseMULXHi) { |
5315 |
if (UseMULXHi) { |
| 5316 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
5316 |
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
| 5317 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5317 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 5318 |
ResHi = SDValue(CNode, 0); |
5318 |
ResHi = SDValue(CNode, 0); |
| 5319 |
Chain = SDValue(CNode, 1); |
5319 |
Chain = SDValue(CNode, 1); |
| 5320 |
} else if (UseMULX) { |
5320 |
} else if (UseMULX) { |
| 5321 |
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); |
5321 |
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); |
| 5322 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5322 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 5323 |
ResHi = SDValue(CNode, 0); |
5323 |
ResHi = SDValue(CNode, 0); |
| 5324 |
ResLo = SDValue(CNode, 1); |
5324 |
ResLo = SDValue(CNode, 1); |
| 5325 |
Chain = SDValue(CNode, 2); |
5325 |
Chain = SDValue(CNode, 2); |
| 5326 |
} else { |
5326 |
} else { |
| 5327 |
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
5327 |
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
| 5328 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5328 |
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
| 5329 |
Chain = SDValue(CNode, 0); |
5329 |
Chain = SDValue(CNode, 0); |
| 5330 |
InGlue = SDValue(CNode, 1); |
5330 |
InGlue = SDValue(CNode, 1); |
| 5331 |
} |
5331 |
} |
| 5332 |
|
5332 |
|
| 5333 |
// Update the chain. |
5333 |
// Update the chain. |
| 5334 |
ReplaceUses(N1.getValue(1), Chain); |
5334 |
ReplaceUses(N1.getValue(1), Chain); |
| 5335 |
// Record the mem-refs |
5335 |
// Record the mem-refs |
| 5336 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
5336 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
| 5337 |
} else { |
5337 |
} else { |
| 5338 |
SDValue Ops[] = { N1, InGlue }; |
5338 |
SDValue Ops[] = { N1, InGlue }; |
| 5339 |
if (UseMULXHi) { |
5339 |
if (UseMULXHi) { |
| 5340 |
SDVTList VTs = CurDAG->getVTList(NVT); |
5340 |
SDVTList VTs = CurDAG->getVTList(NVT); |
| 5341 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5341 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
| 5342 |
ResHi = SDValue(CNode, 0); |
5342 |
ResHi = SDValue(CNode, 0); |
| 5343 |
} else if (UseMULX) { |
5343 |
} else if (UseMULX) { |
| 5344 |
SDVTList VTs = CurDAG->getVTList(NVT, NVT); |
5344 |
SDVTList VTs = CurDAG->getVTList(NVT, NVT); |
| 5345 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5345 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
| 5346 |
ResHi = SDValue(CNode, 0); |
5346 |
ResHi = SDValue(CNode, 0); |
| 5347 |
ResLo = SDValue(CNode, 1); |
5347 |
ResLo = SDValue(CNode, 1); |
| 5348 |
} else { |
5348 |
} else { |
| 5349 |
SDVTList VTs = CurDAG->getVTList(MVT::Glue); |
5349 |
SDVTList VTs = CurDAG->getVTList(MVT::Glue); |
| 5350 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5350 |
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
| 5351 |
InGlue = SDValue(CNode, 0); |
5351 |
InGlue = SDValue(CNode, 0); |
| 5352 |
} |
5352 |
} |
| 5353 |
} |
5353 |
} |
| 5354 |
|
5354 |
|
| 5355 |
// Copy the low half of the result, if it is needed. |
5355 |
// Copy the low half of the result, if it is needed. |
| 5356 |
if (!SDValue(Node, 0).use_empty()) { |
5356 |
if (!SDValue(Node, 0).use_empty()) { |
| 5357 |
if (!ResLo) { |
5357 |
if (!ResLo) { |
| 5358 |
assert(LoReg && "Register for low half is not defined!"); |
5358 |
assert(LoReg && "Register for low half is not defined!"); |
| 5359 |
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, |
5359 |
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, |
| 5360 |
NVT, InGlue); |
5360 |
NVT, InGlue); |
| 5361 |
InGlue = ResLo.getValue(2); |
5361 |
InGlue = ResLo.getValue(2); |
| 5362 |
} |
5362 |
} |
| 5363 |
ReplaceUses(SDValue(Node, 0), ResLo); |
5363 |
ReplaceUses(SDValue(Node, 0), ResLo); |
| 5364 |
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); |
5364 |
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); |
| 5365 |
dbgs() << '\n'); |
5365 |
dbgs() << '\n'); |
| 5366 |
} |
5366 |
} |
| 5367 |
// Copy the high half of the result, if it is needed. |
5367 |
// Copy the high half of the result, if it is needed. |
| 5368 |
if (!SDValue(Node, 1).use_empty()) { |
5368 |
if (!SDValue(Node, 1).use_empty()) { |
| 5369 |
if (!ResHi) { |
5369 |
if (!ResHi) { |
| 5370 |
assert(HiReg && "Register for high half is not defined!"); |
5370 |
assert(HiReg && "Register for high half is not defined!"); |
| 5371 |
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, |
5371 |
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, |
| 5372 |
NVT, InGlue); |
5372 |
NVT, InGlue); |
| 5373 |
InGlue = ResHi.getValue(2); |
5373 |
InGlue = ResHi.getValue(2); |
| 5374 |
} |
5374 |
} |
| 5375 |
ReplaceUses(SDValue(Node, 1), ResHi); |
5375 |
ReplaceUses(SDValue(Node, 1), ResHi); |
| 5376 |
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); |
5376 |
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); |
| 5377 |
dbgs() << '\n'); |
5377 |
dbgs() << '\n'); |
| 5378 |
} |
5378 |
} |
| 5379 |
|
5379 |
|
| 5380 |
CurDAG->RemoveDeadNode(Node); |
5380 |
CurDAG->RemoveDeadNode(Node); |
| 5381 |
return; |
5381 |
return; |
| 5382 |
} |
5382 |
} |
| 5383 |
|
5383 |
|
| 5384 |
case ISD::SDIVREM: |
5384 |
case ISD::SDIVREM: |
| 5385 |
case ISD::UDIVREM: { |
5385 |
case ISD::UDIVREM: { |
| 5386 |
SDValue N0 = Node->getOperand(0); |
5386 |
SDValue N0 = Node->getOperand(0); |
| 5387 |
SDValue N1 = Node->getOperand(1); |
5387 |
SDValue N1 = Node->getOperand(1); |
| 5388 |
|
5388 |
|
| 5389 |
unsigned ROpc, MOpc; |
5389 |
unsigned ROpc, MOpc; |
| 5390 |
bool isSigned = Opcode == ISD::SDIVREM; |
5390 |
bool isSigned = Opcode == ISD::SDIVREM; |
| 5391 |
if (!isSigned) { |
5391 |
if (!isSigned) { |
| 5392 |
switch (NVT.SimpleTy) { |
5392 |
switch (NVT.SimpleTy) { |
| 5393 |
default: llvm_unreachable("Unsupported VT!"); |
5393 |
default: llvm_unreachable("Unsupported VT!"); |
| 5394 |
case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; |
5394 |
case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; |
| 5395 |
case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; |
5395 |
case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; |
| 5396 |
case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; |
5396 |
case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; |
| 5397 |
case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; |
5397 |
case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; |
| 5398 |
} |
5398 |
} |
| 5399 |
} else { |
5399 |
} else { |
| 5400 |
switch (NVT.SimpleTy) { |
5400 |
switch (NVT.SimpleTy) { |
| 5401 |
default: llvm_unreachable("Unsupported VT!"); |
5401 |
default: llvm_unreachable("Unsupported VT!"); |
| 5402 |
case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; |
5402 |
case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; |
| 5403 |
case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; |
5403 |
case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; |
| 5404 |
case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; |
5404 |
case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; |
| 5405 |
case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; |
5405 |
case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; |
| 5406 |
} |
5406 |
} |
| 5407 |
} |
5407 |
} |
| 5408 |
|
5408 |
|
| 5409 |
unsigned LoReg, HiReg, ClrReg; |
5409 |
unsigned LoReg, HiReg, ClrReg; |
| 5410 |
unsigned SExtOpcode; |
5410 |
unsigned SExtOpcode; |
| 5411 |
switch (NVT.SimpleTy) { |
5411 |
switch (NVT.SimpleTy) { |
| 5412 |
default: llvm_unreachable("Unsupported VT!"); |
5412 |
default: llvm_unreachable("Unsupported VT!"); |
| 5413 |
case MVT::i8: |
5413 |
case MVT::i8: |
| 5414 |
LoReg = X86::AL; ClrReg = HiReg = X86::AH; |
5414 |
LoReg = X86::AL; ClrReg = HiReg = X86::AH; |
| 5415 |
SExtOpcode = 0; // Not used. |
5415 |
SExtOpcode = 0; // Not used. |
| 5416 |
break; |
5416 |
break; |
| 5417 |
case MVT::i16: |
5417 |
case MVT::i16: |
| 5418 |
LoReg = X86::AX; HiReg = X86::DX; |
5418 |
LoReg = X86::AX; HiReg = X86::DX; |
| 5419 |
ClrReg = X86::DX; |
5419 |
ClrReg = X86::DX; |
| 5420 |
SExtOpcode = X86::CWD; |
5420 |
SExtOpcode = X86::CWD; |
| 5421 |
break; |
5421 |
break; |
| 5422 |
case MVT::i32: |
5422 |
case MVT::i32: |
| 5423 |
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; |
5423 |
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; |
| 5424 |
SExtOpcode = X86::CDQ; |
5424 |
SExtOpcode = X86::CDQ; |
| 5425 |
break; |
5425 |
break; |
| 5426 |
case MVT::i64: |
5426 |
case MVT::i64: |
| 5427 |
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; |
5427 |
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; |
| 5428 |
SExtOpcode = X86::CQO; |
5428 |
SExtOpcode = X86::CQO; |
| 5429 |
break; |
5429 |
break; |
| 5430 |
} |
5430 |
} |
| 5431 |
|
5431 |
|
| 5432 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5432 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 5433 |
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5433 |
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
| 5434 |
bool signBitIsZero = CurDAG->SignBitIsZero(N0); |
5434 |
bool signBitIsZero = CurDAG->SignBitIsZero(N0); |
| 5435 |
|
5435 |
|
| 5436 |
SDValue InGlue; |
5436 |
SDValue InGlue; |
| 5437 |
if (NVT == MVT::i8) { |
5437 |
if (NVT == MVT::i8) { |
| 5438 |
// Special case for div8, just use a move with zero extension to AX to |
5438 |
// Special case for div8, just use a move with zero extension to AX to |
| 5439 |
// clear the upper 8 bits (AH). |
5439 |
// clear the upper 8 bits (AH). |
| 5440 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; |
5440 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; |
| 5441 |
MachineSDNode *Move; |
5441 |
MachineSDNode *Move; |
| 5442 |
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
5442 |
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 5443 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
5443 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
| 5444 |
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 |
5444 |
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 |
| 5445 |
: X86::MOVZX16rm8; |
5445 |
: X86::MOVZX16rm8; |
| 5446 |
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); |
5446 |
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); |
| 5447 |
Chain = SDValue(Move, 1); |
5447 |
Chain = SDValue(Move, 1); |
| 5448 |
ReplaceUses(N0.getValue(1), Chain); |
5448 |
ReplaceUses(N0.getValue(1), Chain); |
| 5449 |
// Record the mem-refs |
5449 |
// Record the mem-refs |
| 5450 |
CurDAG->setNodeMemRefs(Move, {cast(N0)->getMemOperand()}); |
5450 |
CurDAG->setNodeMemRefs(Move, {cast(N0)->getMemOperand()}); |
| 5451 |
} else { |
5451 |
} else { |
| 5452 |
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 |
5452 |
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 |
| 5453 |
: X86::MOVZX16rr8; |
5453 |
: X86::MOVZX16rr8; |
| 5454 |
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); |
5454 |
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); |
| 5455 |
Chain = CurDAG->getEntryNode(); |
5455 |
Chain = CurDAG->getEntryNode(); |
| 5456 |
} |
5456 |
} |
| 5457 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), |
5457 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), |
| 5458 |
SDValue()); |
5458 |
SDValue()); |
| 5459 |
InGlue = Chain.getValue(1); |
5459 |
InGlue = Chain.getValue(1); |
| 5460 |
} else { |
5460 |
} else { |
| 5461 |
InGlue = |
5461 |
InGlue = |
| 5462 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, |
5462 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, |
| 5463 |
LoReg, N0, SDValue()).getValue(1); |
5463 |
LoReg, N0, SDValue()).getValue(1); |
| 5464 |
if (isSigned && !signBitIsZero) { |
5464 |
if (isSigned && !signBitIsZero) { |
| 5465 |
// Sign extend the low part into the high part. |
5465 |
// Sign extend the low part into the high part. |
| 5466 |
InGlue = |
5466 |
InGlue = |
| 5467 |
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0); |
5467 |
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0); |
| 5468 |
} else { |
5468 |
} else { |
| 5469 |
// Zero out the high part, effectively zero extending the input. |
5469 |
// Zero out the high part, effectively zero extending the input. |
| 5470 |
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
5470 |
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
| 5471 |
SDValue ClrNode = SDValue( |
5471 |
SDValue ClrNode = SDValue( |
| 5472 |
CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); |
5472 |
CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); |
| 5473 |
switch (NVT.SimpleTy) { |
5473 |
switch (NVT.SimpleTy) { |
| 5474 |
case MVT::i16: |
5474 |
case MVT::i16: |
| 5475 |
ClrNode = |
5475 |
ClrNode = |
| 5476 |
SDValue(CurDAG->getMachineNode( |
5476 |
SDValue(CurDAG->getMachineNode( |
| 5477 |
TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, |
5477 |
TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, |
| 5478 |
CurDAG->getTargetConstant(X86::sub_16bit, dl, |
5478 |
CurDAG->getTargetConstant(X86::sub_16bit, dl, |
| 5479 |
MVT::i32)), |
5479 |
MVT::i32)), |
| 5480 |
0); |
5480 |
0); |
| 5481 |
break; |
5481 |
break; |
| 5482 |
case MVT::i32: |
5482 |
case MVT::i32: |
| 5483 |
break; |
5483 |
break; |
| 5484 |
case MVT::i64: |
5484 |
case MVT::i64: |
| 5485 |
ClrNode = |
5485 |
ClrNode = |
| 5486 |
SDValue(CurDAG->getMachineNode( |
5486 |
SDValue(CurDAG->getMachineNode( |
| 5487 |
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
5487 |
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
| 5488 |
CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, |
5488 |
CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, |
| 5489 |
CurDAG->getTargetConstant(X86::sub_32bit, dl, |
5489 |
CurDAG->getTargetConstant(X86::sub_32bit, dl, |
| 5490 |
MVT::i32)), |
5490 |
MVT::i32)), |
| 5491 |
0); |
5491 |
0); |
| 5492 |
break; |
5492 |
break; |
| 5493 |
default: |
5493 |
default: |
| 5494 |
llvm_unreachable("Unexpected division source"); |
5494 |
llvm_unreachable("Unexpected division source"); |
| 5495 |
} |
5495 |
} |
| 5496 |
|
5496 |
|
| 5497 |
InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, |
5497 |
InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, |
| 5498 |
ClrNode, InGlue).getValue(1); |
5498 |
ClrNode, InGlue).getValue(1); |
| 5499 |
} |
5499 |
} |
| 5500 |
} |
5500 |
} |
| 5501 |
|
5501 |
|
| 5502 |
if (foldedLoad) { |
5502 |
if (foldedLoad) { |
| 5503 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5503 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
| 5504 |
InGlue }; |
5504 |
InGlue }; |
| 5505 |
MachineSDNode *CNode = |
5505 |
MachineSDNode *CNode = |
| 5506 |
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); |
5506 |
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); |
| 5507 |
InGlue = SDValue(CNode, 1); |
5507 |
InGlue = SDValue(CNode, 1); |
| 5508 |
// Update the chain. |
5508 |
// Update the chain. |
| 5509 |
ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); |
5509 |
ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); |
| 5510 |
// Record the mem-refs |
5510 |
// Record the mem-refs |
| 5511 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
5511 |
CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); |
| 5512 |
} else { |
5512 |
} else { |
| 5513 |
InGlue = |
5513 |
InGlue = |
| 5514 |
SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0); |
5514 |
SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0); |
| 5515 |
} |
5515 |
} |
| 5516 |
|
5516 |
|
| 5517 |
// Prevent use of AH in a REX instruction by explicitly copying it to |
5517 |
// Prevent use of AH in a REX instruction by explicitly copying it to |
| 5518 |
// an ABCD_L register. |
5518 |
// an ABCD_L register. |
| 5519 |
// |
5519 |
// |
| 5520 |
// The current assumption of the register allocator is that isel |
5520 |
// The current assumption of the register allocator is that isel |
| 5521 |
// won't generate explicit references to the GR8_ABCD_H registers. If |
5521 |
// won't generate explicit references to the GR8_ABCD_H registers. If |
| 5522 |
// the allocator and/or the backend get enhanced to be more robust in |
5522 |
// the allocator and/or the backend get enhanced to be more robust in |
| 5523 |
// that regard, this can be, and should be, removed. |
5523 |
// that regard, this can be, and should be, removed. |
| 5524 |
if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { |
5524 |
if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { |
| 5525 |
SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); |
5525 |
SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); |
| 5526 |
unsigned AHExtOpcode = |
5526 |
unsigned AHExtOpcode = |
| 5527 |
isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; |
5527 |
isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; |
| 5528 |
|
5528 |
|
| 5529 |
SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, |
5529 |
SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, |
| 5530 |
MVT::Glue, AHCopy, InGlue); |
5530 |
MVT::Glue, AHCopy, InGlue); |
| 5531 |
SDValue Result(RNode, 0); |
5531 |
SDValue Result(RNode, 0); |
| 5532 |
InGlue = SDValue(RNode, 1); |
5532 |
InGlue = SDValue(RNode, 1); |
| 5533 |
|
5533 |
|
| 5534 |
Result = |
5534 |
Result = |
| 5535 |
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); |
5535 |
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); |
| 5536 |
|
5536 |
|
| 5537 |
ReplaceUses(SDValue(Node, 1), Result); |
5537 |
ReplaceUses(SDValue(Node, 1), Result); |
| 5538 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5538 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
| 5539 |
dbgs() << '\n'); |
5539 |
dbgs() << '\n'); |
| 5540 |
} |
5540 |
} |
| 5541 |
// Copy the division (low) result, if it is needed. |
5541 |
// Copy the division (low) result, if it is needed. |
| 5542 |
if (!SDValue(Node, 0).use_empty()) { |
5542 |
if (!SDValue(Node, 0).use_empty()) { |
| 5543 |
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
5543 |
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
| 5544 |
LoReg, NVT, InGlue); |
5544 |
LoReg, NVT, InGlue); |
| 5545 |
InGlue = Result.getValue(2); |
5545 |
InGlue = Result.getValue(2); |
| 5546 |
ReplaceUses(SDValue(Node, 0), Result); |
5546 |
ReplaceUses(SDValue(Node, 0), Result); |
| 5547 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5547 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
| 5548 |
dbgs() << '\n'); |
5548 |
dbgs() << '\n'); |
| 5549 |
} |
5549 |
} |
| 5550 |
// Copy the remainder (high) result, if it is needed. |
5550 |
// Copy the remainder (high) result, if it is needed. |
| 5551 |
if (!SDValue(Node, 1).use_empty()) { |
5551 |
if (!SDValue(Node, 1).use_empty()) { |
| 5552 |
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
5552 |
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
| 5553 |
HiReg, NVT, InGlue); |
5553 |
HiReg, NVT, InGlue); |
| 5554 |
InGlue = Result.getValue(2); |
5554 |
InGlue = Result.getValue(2); |
| 5555 |
ReplaceUses(SDValue(Node, 1), Result); |
5555 |
ReplaceUses(SDValue(Node, 1), Result); |
| 5556 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5556 |
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
| 5557 |
dbgs() << '\n'); |
5557 |
dbgs() << '\n'); |
| 5558 |
} |
5558 |
} |
| 5559 |
CurDAG->RemoveDeadNode(Node); |
5559 |
CurDAG->RemoveDeadNode(Node); |
| 5560 |
return; |
5560 |
return; |
| 5561 |
} |
5561 |
} |
| 5562 |
|
5562 |
|
| 5563 |
case X86ISD::FCMP: |
5563 |
case X86ISD::FCMP: |
| 5564 |
case X86ISD::STRICT_FCMP: |
5564 |
case X86ISD::STRICT_FCMP: |
| 5565 |
case X86ISD::STRICT_FCMPS: { |
5565 |
case X86ISD::STRICT_FCMPS: { |
| 5566 |
bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || |
5566 |
bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || |
| 5567 |
Node->getOpcode() == X86ISD::STRICT_FCMPS; |
5567 |
Node->getOpcode() == X86ISD::STRICT_FCMPS; |
| 5568 |
SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); |
5568 |
SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); |
| 5569 |
SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); |
5569 |
SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); |
| 5570 |
|
5570 |
|
| 5571 |
// Save the original VT of the compare. |
5571 |
// Save the original VT of the compare. |
| 5572 |
MVT CmpVT = N0.getSimpleValueType(); |
5572 |
MVT CmpVT = N0.getSimpleValueType(); |
| 5573 |
|
5573 |
|
| 5574 |
// Floating point needs special handling if we don't have FCOMI. |
5574 |
// Floating point needs special handling if we don't have FCOMI. |
| 5575 |
if (Subtarget->canUseCMOV()) |
5575 |
if (Subtarget->canUseCMOV()) |
| 5576 |
break; |
5576 |
break; |
| 5577 |
|
5577 |
|
| 5578 |
bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; |
5578 |
bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; |
| 5579 |
|
5579 |
|
| 5580 |
unsigned Opc; |
5580 |
unsigned Opc; |
| 5581 |
switch (CmpVT.SimpleTy) { |
5581 |
switch (CmpVT.SimpleTy) { |
| 5582 |
default: llvm_unreachable("Unexpected type!"); |
5582 |
default: llvm_unreachable("Unexpected type!"); |
| 5583 |
case MVT::f32: |
5583 |
case MVT::f32: |
| 5584 |
Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; |
5584 |
Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; |
| 5585 |
break; |
5585 |
break; |
| 5586 |
case MVT::f64: |
5586 |
case MVT::f64: |
| 5587 |
Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; |
5587 |
Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; |
| 5588 |
break; |
5588 |
break; |
| 5589 |
case MVT::f80: |
5589 |
case MVT::f80: |
| 5590 |
Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; |
5590 |
Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; |
| 5591 |
break; |
5591 |
break; |
| 5592 |
} |
5592 |
} |
| 5593 |
|
5593 |
|
| 5594 |
SDValue Chain = |
5594 |
SDValue Chain = |
| 5595 |
IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); |
5595 |
IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); |
| 5596 |
SDValue Glue; |
5596 |
SDValue Glue; |
| 5597 |
if (IsStrictCmp) { |
5597 |
if (IsStrictCmp) { |
| 5598 |
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
5598 |
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
| 5599 |
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); |
5599 |
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); |
| 5600 |
Glue = Chain.getValue(1); |
5600 |
Glue = Chain.getValue(1); |
| 5601 |
} else { |
5601 |
} else { |
| 5602 |
Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0); |
5602 |
Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0); |
| 5603 |
} |
5603 |
} |
| 5604 |
|
5604 |
|
| 5605 |
// Move FPSW to AX. |
5605 |
// Move FPSW to AX. |
| 5606 |
SDValue FNSTSW = |
5606 |
SDValue FNSTSW = |
| 5607 |
SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0); |
5607 |
SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0); |
| 5608 |
|
5608 |
|
| 5609 |
// Extract upper 8-bits of AX. |
5609 |
// Extract upper 8-bits of AX. |
| 5610 |
SDValue Extract = |
5610 |
SDValue Extract = |
| 5611 |
CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); |
5611 |
CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); |
| 5612 |
|
5612 |
|
| 5613 |
// Move AH into flags. |
5613 |
// Move AH into flags. |
| 5614 |
// Some 64-bit targets lack SAHF support, but they do support FCOMI. |
5614 |
// Some 64-bit targets lack SAHF support, but they do support FCOMI. |
| 5615 |
assert(Subtarget->canUseLAHFSAHF() && |
5615 |
assert(Subtarget->canUseLAHFSAHF() && |
| 5616 |
"Target doesn't support SAHF or FCOMI?"); |
5616 |
"Target doesn't support SAHF or FCOMI?"); |
| 5617 |
SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); |
5617 |
SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); |
| 5618 |
Chain = AH; |
5618 |
Chain = AH; |
| 5619 |
SDValue SAHF = SDValue( |
5619 |
SDValue SAHF = SDValue( |
| 5620 |
CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); |
5620 |
CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); |
| 5621 |
|
5621 |
|
| 5622 |
if (IsStrictCmp) |
5622 |
if (IsStrictCmp) |
| 5623 |
ReplaceUses(SDValue(Node, 1), Chain); |
5623 |
ReplaceUses(SDValue(Node, 1), Chain); |
| 5624 |
|
5624 |
|
| 5625 |
ReplaceUses(SDValue(Node, 0), SAHF); |
5625 |
ReplaceUses(SDValue(Node, 0), SAHF); |
| 5626 |
CurDAG->RemoveDeadNode(Node); |
5626 |
CurDAG->RemoveDeadNode(Node); |
| 5627 |
return; |
5627 |
return; |
| 5628 |
} |
5628 |
} |
| 5629 |
|
5629 |
|
| 5630 |
case X86ISD::CMP: { |
5630 |
case X86ISD::CMP: { |
| 5631 |
SDValue N0 = Node->getOperand(0); |
5631 |
SDValue N0 = Node->getOperand(0); |
| 5632 |
SDValue N1 = Node->getOperand(1); |
5632 |
SDValue N1 = Node->getOperand(1); |
| 5633 |
|
5633 |
|
| 5634 |
// Optimizations for TEST compares. |
5634 |
// Optimizations for TEST compares. |
| 5635 |
if (!isNullConstant(N1)) |
5635 |
if (!isNullConstant(N1)) |
| 5636 |
break; |
5636 |
break; |
| 5637 |
|
5637 |
|
| 5638 |
// Save the original VT of the compare. |
5638 |
// Save the original VT of the compare. |
| 5639 |
MVT CmpVT = N0.getSimpleValueType(); |
5639 |
MVT CmpVT = N0.getSimpleValueType(); |
| 5640 |
|
5640 |
|
| 5641 |
// If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed |
5641 |
// If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed |
| 5642 |
// by a test instruction. The test should be removed later by |
5642 |
// by a test instruction. The test should be removed later by |
| 5643 |
// analyzeCompare if we are using only the zero flag. |
5643 |
// analyzeCompare if we are using only the zero flag. |
| 5644 |
// TODO: Should we check the users and use the BEXTR flags directly? |
5644 |
// TODO: Should we check the users and use the BEXTR flags directly? |
| 5645 |
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { |
5645 |
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { |
| 5646 |
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) { |
5646 |
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) { |
| 5647 |
unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr |
5647 |
unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr |
| 5648 |
: X86::TEST32rr; |
5648 |
: X86::TEST32rr; |
| 5649 |
SDValue BEXTR = SDValue(NewNode, 0); |
5649 |
SDValue BEXTR = SDValue(NewNode, 0); |
| 5650 |
NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR); |
5650 |
NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR); |
| 5651 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
5651 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
| 5652 |
CurDAG->RemoveDeadNode(Node); |
5652 |
CurDAG->RemoveDeadNode(Node); |
| 5653 |
return; |
5653 |
return; |
| 5654 |
} |
5654 |
} |
| 5655 |
} |
5655 |
} |
| 5656 |
|
5656 |
|
| 5657 |
// We can peek through truncates, but we need to be careful below. |
5657 |
// We can peek through truncates, but we need to be careful below. |
| 5658 |
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
5658 |
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
| 5659 |
N0 = N0.getOperand(0); |
5659 |
N0 = N0.getOperand(0); |
| 5660 |
|
5660 |
|
| 5661 |
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to |
5661 |
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to |
| 5662 |
// use a smaller encoding. |
5662 |
// use a smaller encoding. |
| 5663 |
// Look past the truncate if CMP is the only use of it. |
5663 |
// Look past the truncate if CMP is the only use of it. |
| 5664 |
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && |
5664 |
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && |
| 5665 |
N0.getValueType() != MVT::i8) { |
5665 |
N0.getValueType() != MVT::i8) { |
| 5666 |
auto *MaskC = dyn_cast(N0.getOperand(1)); |
5666 |
auto *MaskC = dyn_cast(N0.getOperand(1)); |
| 5667 |
if (!MaskC) |
5667 |
if (!MaskC) |
| 5668 |
break; |
5668 |
break; |
| 5669 |
|
5669 |
|
| 5670 |
// We may have looked through a truncate so mask off any bits that |
5670 |
// We may have looked through a truncate so mask off any bits that |
| 5671 |
// shouldn't be part of the compare. |
5671 |
// shouldn't be part of the compare. |
| 5672 |
uint64_t Mask = MaskC->getZExtValue(); |
5672 |
uint64_t Mask = MaskC->getZExtValue(); |
| 5673 |
Mask &= maskTrailingOnes(CmpVT.getScalarSizeInBits()); |
5673 |
Mask &= maskTrailingOnes(CmpVT.getScalarSizeInBits()); |
| 5674 |
|
5674 |
|
| 5675 |
// Check if we can replace AND+IMM{32,64} with a shift. This is possible |
5675 |
// Check if we can replace AND+IMM{32,64} with a shift. This is possible |
| 5676 |
// for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the |
5676 |
// for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the |
| 5677 |
// zero flag. |
5677 |
// zero flag. |
| 5678 |
if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) && |
5678 |
if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) && |
| 5679 |
onlyUsesZeroFlag(SDValue(Node, 0))) { |
5679 |
onlyUsesZeroFlag(SDValue(Node, 0))) { |
| 5680 |
unsigned ShiftOpcode = ISD::DELETED_NODE; |
5680 |
unsigned ShiftOpcode = ISD::DELETED_NODE; |
| 5681 |
unsigned ShiftAmt; |
5681 |
unsigned ShiftAmt; |
| 5682 |
unsigned SubRegIdx; |
5682 |
unsigned SubRegIdx; |
| 5683 |
MVT SubRegVT; |
5683 |
MVT SubRegVT; |
| 5684 |
unsigned TestOpcode; |
5684 |
unsigned TestOpcode; |
| 5685 |
unsigned LeadingZeros = llvm::countl_zero(Mask); |
5685 |
unsigned LeadingZeros = llvm::countl_zero(Mask); |
| 5686 |
unsigned TrailingZeros = llvm::countr_zero(Mask); |
5686 |
unsigned TrailingZeros = llvm::countr_zero(Mask); |
| 5687 |
|
5687 |
|
| 5688 |
// With leading/trailing zeros, the transform is profitable if we can |
5688 |
// With leading/trailing zeros, the transform is profitable if we can |
| 5689 |
// eliminate a movabsq or shrink a 32-bit immediate to 8-bit without |
5689 |
// eliminate a movabsq or shrink a 32-bit immediate to 8-bit without |
| 5690 |
// incurring any extra register moves. |
5690 |
// incurring any extra register moves. |
| 5691 |
bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse(); |
5691 |
bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse(); |
| 5692 |
if (LeadingZeros == 0 && SavesBytes) { |
5692 |
if (LeadingZeros == 0 && SavesBytes) { |
| 5693 |
// If the mask covers the most significant bit, then we can replace |
5693 |
// If the mask covers the most significant bit, then we can replace |
| 5694 |
// TEST+AND with a SHR and check eflags. |
5694 |
// TEST+AND with a SHR and check eflags. |
| 5695 |
// This emits a redundant TEST which is subsequently eliminated. |
5695 |
// This emits a redundant TEST which is subsequently eliminated. |
| 5696 |
ShiftOpcode = X86::SHR64ri; |
5696 |
ShiftOpcode = X86::SHR64ri; |
| 5697 |
ShiftAmt = TrailingZeros; |
5697 |
ShiftAmt = TrailingZeros; |
| 5698 |
SubRegIdx = 0; |
5698 |
SubRegIdx = 0; |
| 5699 |
TestOpcode = X86::TEST64rr; |
5699 |
TestOpcode = X86::TEST64rr; |
| 5700 |
} else if (TrailingZeros == 0 && SavesBytes) { |
5700 |
} else if (TrailingZeros == 0 && SavesBytes) { |
| 5701 |
// If the mask covers the least significant bit, then we can replace |
5701 |
// If the mask covers the least significant bit, then we can replace |
| 5702 |
// TEST+AND with a SHL and check eflags. |
5702 |
// TEST+AND with a SHL and check eflags. |
| 5703 |
// This emits a redundant TEST which is subsequently eliminated. |
5703 |
// This emits a redundant TEST which is subsequently eliminated. |
| 5704 |
ShiftOpcode = X86::SHL64ri; |
5704 |
ShiftOpcode = X86::SHL64ri; |
| 5705 |
ShiftAmt = LeadingZeros; |
5705 |
ShiftAmt = LeadingZeros; |
| 5706 |
SubRegIdx = 0; |
5706 |
SubRegIdx = 0; |
| 5707 |
TestOpcode = X86::TEST64rr; |
5707 |
TestOpcode = X86::TEST64rr; |
| 5708 |
} else if (MaskC->hasOneUse() && !isInt<32>(Mask)) { |
5708 |
} else if (MaskC->hasOneUse() && !isInt<32>(Mask)) { |
| 5709 |
// If the shifted mask extends into the high half and is 8/16/32 bits |
5709 |
// If the shifted mask extends into the high half and is 8/16/32 bits |
| 5710 |
// wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr. |
5710 |
// wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr. |
| 5711 |
unsigned PopCount = 64 - LeadingZeros - TrailingZeros; |
5711 |
unsigned PopCount = 64 - LeadingZeros - TrailingZeros; |
| 5712 |
if (PopCount == 8) { |
5712 |
if (PopCount == 8) { |
| 5713 |
ShiftOpcode = X86::SHR64ri; |
5713 |
ShiftOpcode = X86::SHR64ri; |
| 5714 |
ShiftAmt = TrailingZeros; |
5714 |
ShiftAmt = TrailingZeros; |
| 5715 |
SubRegIdx = X86::sub_8bit; |
5715 |
SubRegIdx = X86::sub_8bit; |
| 5716 |
SubRegVT = MVT::i8; |
5716 |
SubRegVT = MVT::i8; |
| 5717 |
TestOpcode = X86::TEST8rr; |
5717 |
TestOpcode = X86::TEST8rr; |
| 5718 |
} else if (PopCount == 16) { |
5718 |
} else if (PopCount == 16) { |
| 5719 |
ShiftOpcode = X86::SHR64ri; |
5719 |
ShiftOpcode = X86::SHR64ri; |
| 5720 |
ShiftAmt = TrailingZeros; |
5720 |
ShiftAmt = TrailingZeros; |
| 5721 |
SubRegIdx = X86::sub_16bit; |
5721 |
SubRegIdx = X86::sub_16bit; |
| 5722 |
SubRegVT = MVT::i16; |
5722 |
SubRegVT = MVT::i16; |
| 5723 |
TestOpcode = X86::TEST16rr; |
5723 |
TestOpcode = X86::TEST16rr; |
| 5724 |
} else if (PopCount == 32) { |
5724 |
} else if (PopCount == 32) { |
| 5725 |
ShiftOpcode = X86::SHR64ri; |
5725 |
ShiftOpcode = X86::SHR64ri; |
| 5726 |
ShiftAmt = TrailingZeros; |
5726 |
ShiftAmt = TrailingZeros; |
| 5727 |
SubRegIdx = X86::sub_32bit; |
5727 |
SubRegIdx = X86::sub_32bit; |
| 5728 |
SubRegVT = MVT::i32; |
5728 |
SubRegVT = MVT::i32; |
| 5729 |
TestOpcode = X86::TEST32rr; |
5729 |
TestOpcode = X86::TEST32rr; |
| 5730 |
} |
5730 |
} |
| 5731 |
} |
5731 |
} |
| 5732 |
if (ShiftOpcode != ISD::DELETED_NODE) { |
5732 |
if (ShiftOpcode != ISD::DELETED_NODE) { |
| 5733 |
SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64); |
5733 |
SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64); |
| 5734 |
SDValue Shift = SDValue( |
5734 |
SDValue Shift = SDValue( |
| 5735 |
CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32, |
5735 |
CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32, |
| 5736 |
N0.getOperand(0), ShiftC), |
5736 |
N0.getOperand(0), ShiftC), |
| 5737 |
0); |
5737 |
0); |
| 5738 |
if (SubRegIdx != 0) { |
5738 |
if (SubRegIdx != 0) { |
| 5739 |
Shift = |
5739 |
Shift = |
| 5740 |
CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift); |
5740 |
CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift); |
| 5741 |
} |
5741 |
} |
| 5742 |
MachineSDNode *Test = |
5742 |
MachineSDNode *Test = |
| 5743 |
CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift); |
5743 |
CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift); |
| 5744 |
ReplaceNode(Node, Test); |
5744 |
ReplaceNode(Node, Test); |
| 5745 |
return; |
5745 |
return; |
| 5746 |
} |
5746 |
} |
| 5747 |
} |
5747 |
} |
| 5748 |
|
5748 |
|
| 5749 |
MVT VT; |
5749 |
MVT VT; |
| 5750 |
int SubRegOp; |
5750 |
int SubRegOp; |
| 5751 |
unsigned ROpc, MOpc; |
5751 |
unsigned ROpc, MOpc; |
| 5752 |
|
5752 |
|
| 5753 |
// For each of these checks we need to be careful if the sign flag is |
5753 |
// For each of these checks we need to be careful if the sign flag is |
| 5754 |
// being used. It is only safe to use the sign flag in two conditions, |
5754 |
// being used. It is only safe to use the sign flag in two conditions, |
| 5755 |
// either the sign bit in the shrunken mask is zero or the final test |
5755 |
// either the sign bit in the shrunken mask is zero or the final test |
| 5756 |
// size is equal to the original compare size. |
5756 |
// size is equal to the original compare size. |
| 5757 |
|
5757 |
|
| 5758 |
if (isUInt<8>(Mask) && |
5758 |
if (isUInt<8>(Mask) && |
| 5759 |
(!(Mask & 0x80) || CmpVT == MVT::i8 || |
5759 |
(!(Mask & 0x80) || CmpVT == MVT::i8 || |
| 5760 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
5760 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
| 5761 |
// For example, convert "testl %eax, $8" to "testb %al, $8" |
5761 |
// For example, convert "testl %eax, $8" to "testb %al, $8" |
| 5762 |
VT = MVT::i8; |
5762 |
VT = MVT::i8; |
| 5763 |
SubRegOp = X86::sub_8bit; |
5763 |
SubRegOp = X86::sub_8bit; |
| 5764 |
ROpc = X86::TEST8ri; |
5764 |
ROpc = X86::TEST8ri; |
| 5765 |
MOpc = X86::TEST8mi; |
5765 |
MOpc = X86::TEST8mi; |
| 5766 |
} else if (OptForMinSize && isUInt<16>(Mask) && |
5766 |
} else if (OptForMinSize && isUInt<16>(Mask) && |
| 5767 |
(!(Mask & 0x8000) || CmpVT == MVT::i16 || |
5767 |
(!(Mask & 0x8000) || CmpVT == MVT::i16 || |
| 5768 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
5768 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
| 5769 |
// For example, "testl %eax, $32776" to "testw %ax, $32776". |
5769 |
// For example, "testl %eax, $32776" to "testw %ax, $32776". |
| 5770 |
// NOTE: We only want to form TESTW instructions if optimizing for |
5770 |
// NOTE: We only want to form TESTW instructions if optimizing for |
| 5771 |
// min size. Otherwise we only save one byte and possibly get a length |
5771 |
// min size. Otherwise we only save one byte and possibly get a length |
| 5772 |
// changing prefix penalty in the decoders. |
5772 |
// changing prefix penalty in the decoders. |
| 5773 |
VT = MVT::i16; |
5773 |
VT = MVT::i16; |
| 5774 |
SubRegOp = X86::sub_16bit; |
5774 |
SubRegOp = X86::sub_16bit; |
| 5775 |
ROpc = X86::TEST16ri; |
5775 |
ROpc = X86::TEST16ri; |
| 5776 |
MOpc = X86::TEST16mi; |
5776 |
MOpc = X86::TEST16mi; |
| 5777 |
} else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && |
5777 |
} else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && |
| 5778 |
((!(Mask & 0x80000000) && |
5778 |
((!(Mask & 0x80000000) && |
| 5779 |
// Without minsize 16-bit Cmps can get here so we need to |
5779 |
// Without minsize 16-bit Cmps can get here so we need to |
| 5780 |
// be sure we calculate the correct sign flag if needed. |
5780 |
// be sure we calculate the correct sign flag if needed. |
| 5781 |
(CmpVT != MVT::i16 || !(Mask & 0x8000))) || |
5781 |
(CmpVT != MVT::i16 || !(Mask & 0x8000))) || |
| 5782 |
CmpVT == MVT::i32 || |
5782 |
CmpVT == MVT::i32 || |
| 5783 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
5783 |
hasNoSignFlagUses(SDValue(Node, 0)))) { |
| 5784 |
// For example, "testq %rax, $268468232" to "testl %eax, $268468232". |
5784 |
// For example, "testq %rax, $268468232" to "testl %eax, $268468232". |
| 5785 |
// NOTE: We only want to run that transform if N0 is 32 or 64 bits. |
5785 |
// NOTE: We only want to run that transform if N0 is 32 or 64 bits. |
| 5786 |
// Otherwize, we find ourselves in a position where we have to do |
5786 |
// Otherwize, we find ourselves in a position where we have to do |
| 5787 |
// promotion. If previous passes did not promote the and, we assume |
5787 |
// promotion. If previous passes did not promote the and, we assume |
| 5788 |
// they had a good reason not to and do not promote here. |
5788 |
// they had a good reason not to and do not promote here. |
| 5789 |
VT = MVT::i32; |
5789 |
VT = MVT::i32; |
| 5790 |
SubRegOp = X86::sub_32bit; |
5790 |
SubRegOp = X86::sub_32bit; |
| 5791 |
ROpc = X86::TEST32ri; |
5791 |
ROpc = X86::TEST32ri; |
| 5792 |
MOpc = X86::TEST32mi; |
5792 |
MOpc = X86::TEST32mi; |
| 5793 |
} else { |
5793 |
} else { |
| 5794 |
// No eligible transformation was found. |
5794 |
// No eligible transformation was found. |
| 5795 |
break; |
5795 |
break; |
| 5796 |
} |
5796 |
} |
| 5797 |
|
5797 |
|
| 5798 |
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); |
5798 |
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); |
| 5799 |
SDValue Reg = N0.getOperand(0); |
5799 |
SDValue Reg = N0.getOperand(0); |
| 5800 |
|
5800 |
|
| 5801 |
// Emit a testl or testw. |
5801 |
// Emit a testl or testw. |
| 5802 |
MachineSDNode *NewNode; |
5802 |
MachineSDNode *NewNode; |
| 5803 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5803 |
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
| 5804 |
if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
5804 |
if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
| 5805 |
if (auto *LoadN = dyn_cast(N0.getOperand(0).getNode())) { |
5805 |
if (auto *LoadN = dyn_cast(N0.getOperand(0).getNode())) { |
| 5806 |
if (!LoadN->isSimple()) { |
5806 |
if (!LoadN->isSimple()) { |
| 5807 |
unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits(); |
5807 |
unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits(); |
| 5808 |
if ((MOpc == X86::TEST8mi && NumVolBits != 8) || |
5808 |
if ((MOpc == X86::TEST8mi && NumVolBits != 8) || |
| 5809 |
(MOpc == X86::TEST16mi && NumVolBits != 16) || |
5809 |
(MOpc == X86::TEST16mi && NumVolBits != 16) || |
| 5810 |
(MOpc == X86::TEST32mi && NumVolBits != 32)) |
5810 |
(MOpc == X86::TEST32mi && NumVolBits != 32)) |
| 5811 |
break; |
5811 |
break; |
| 5812 |
} |
5812 |
} |
| 5813 |
} |
5813 |
} |
| 5814 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
5814 |
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
| 5815 |
Reg.getOperand(0) }; |
5815 |
Reg.getOperand(0) }; |
| 5816 |
NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); |
5816 |
NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); |
| 5817 |
// Update the chain. |
5817 |
// Update the chain. |
| 5818 |
ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); |
5818 |
ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); |
| 5819 |
// Record the mem-refs |
5819 |
// Record the mem-refs |
| 5820 |
CurDAG->setNodeMemRefs(NewNode, |
5820 |
CurDAG->setNodeMemRefs(NewNode, |
| 5821 |
{cast(Reg)->getMemOperand()}); |
5821 |
{cast(Reg)->getMemOperand()}); |
| 5822 |
} else { |
5822 |
} else { |
| 5823 |
// Extract the subregister if necessary. |
5823 |
// Extract the subregister if necessary. |
| 5824 |
if (N0.getValueType() != VT) |
5824 |
if (N0.getValueType() != VT) |
| 5825 |
Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); |
5825 |
Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); |
| 5826 |
|
5826 |
|
| 5827 |
NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); |
5827 |
NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); |
| 5828 |
} |
5828 |
} |
| 5829 |
// Replace CMP with TEST. |
5829 |
// Replace CMP with TEST. |
| 5830 |
ReplaceNode(Node, NewNode); |
5830 |
ReplaceNode(Node, NewNode); |
| 5831 |
return; |
5831 |
return; |
| 5832 |
} |
5832 |
} |
| 5833 |
break; |
5833 |
break; |
| 5834 |
} |
5834 |
} |
| 5835 |
case X86ISD::PCMPISTR: { |
5835 |
case X86ISD::PCMPISTR: { |
| 5836 |
if (!Subtarget->hasSSE42()) |
5836 |
if (!Subtarget->hasSSE42()) |
| 5837 |
break; |
5837 |
break; |
| 5838 |
|
5838 |
|
| 5839 |
bool NeedIndex = !SDValue(Node, 0).use_empty(); |
5839 |
bool NeedIndex = !SDValue(Node, 0).use_empty(); |
| 5840 |
bool NeedMask = !SDValue(Node, 1).use_empty(); |
5840 |
bool NeedMask = !SDValue(Node, 1).use_empty(); |
| 5841 |
// We can't fold a load if we are going to make two instructions. |
5841 |
// We can't fold a load if we are going to make two instructions. |
| 5842 |
bool MayFoldLoad = !NeedIndex || !NeedMask; |
5842 |
bool MayFoldLoad = !NeedIndex || !NeedMask; |
| 5843 |
|
5843 |
|
| 5844 |
MachineSDNode *CNode; |
5844 |
MachineSDNode *CNode; |
| 5845 |
if (NeedMask) { |
5845 |
if (NeedMask) { |
| 5846 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr; |
5846 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr; |
| 5847 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm; |
5847 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm; |
| 5848 |
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node); |
5848 |
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node); |
| 5849 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
5849 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
| 5850 |
} |
5850 |
} |
| 5851 |
if (NeedIndex || !NeedMask) { |
5851 |
if (NeedIndex || !NeedMask) { |
| 5852 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr; |
5852 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr; |
| 5853 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm; |
5853 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm; |
| 5854 |
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node); |
5854 |
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node); |
| 5855 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5855 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
| 5856 |
} |
5856 |
} |
| 5857 |
|
5857 |
|
| 5858 |
// Connect the flag usage to the last instruction created. |
5858 |
// Connect the flag usage to the last instruction created. |
| 5859 |
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
5859 |
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
| 5860 |
CurDAG->RemoveDeadNode(Node); |
5860 |
CurDAG->RemoveDeadNode(Node); |
| 5861 |
return; |
5861 |
return; |
| 5862 |
} |
5862 |
} |
| 5863 |
case X86ISD::PCMPESTR: { |
5863 |
case X86ISD::PCMPESTR: { |
| 5864 |
if (!Subtarget->hasSSE42()) |
5864 |
if (!Subtarget->hasSSE42()) |
| 5865 |
break; |
5865 |
break; |
| 5866 |
|
5866 |
|
| 5867 |
// Copy the two implicit register inputs. |
5867 |
// Copy the two implicit register inputs. |
| 5868 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX, |
5868 |
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX, |
| 5869 |
Node->getOperand(1), |
5869 |
Node->getOperand(1), |
| 5870 |
SDValue()).getValue(1); |
5870 |
SDValue()).getValue(1); |
| 5871 |
InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, |
5871 |
InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, |
| 5872 |
Node->getOperand(3), InGlue).getValue(1); |
5872 |
Node->getOperand(3), InGlue).getValue(1); |
| 5873 |
|
5873 |
|
| 5874 |
bool NeedIndex = !SDValue(Node, 0).use_empty(); |
5874 |
bool NeedIndex = !SDValue(Node, 0).use_empty(); |
| 5875 |
bool NeedMask = !SDValue(Node, 1).use_empty(); |
5875 |
bool NeedMask = !SDValue(Node, 1).use_empty(); |
| 5876 |
// We can't fold a load if we are going to make two instructions. |
5876 |
// We can't fold a load if we are going to make two instructions. |
| 5877 |
bool MayFoldLoad = !NeedIndex || !NeedMask; |
5877 |
bool MayFoldLoad = !NeedIndex || !NeedMask; |
| 5878 |
|
5878 |
|
| 5879 |
MachineSDNode *CNode; |
5879 |
MachineSDNode *CNode; |
| 5880 |
if (NeedMask) { |
5880 |
if (NeedMask) { |
| 5881 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr; |
5881 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr; |
| 5882 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm; |
5882 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm; |
| 5883 |
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, |
5883 |
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, |
| 5884 |
InGlue); |
5884 |
InGlue); |
| 5885 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
5885 |
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
| 5886 |
} |
5886 |
} |
| 5887 |
if (NeedIndex || !NeedMask) { |
5887 |
if (NeedIndex || !NeedMask) { |
| 5888 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr; |
5888 |
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr; |
| 5889 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm; |
5889 |
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm; |
| 5890 |
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue); |
5890 |
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue); |
| 5891 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5891 |
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
| 5892 |
} |
5892 |
} |
| 5893 |
// Connect the flag usage to the last instruction created. |
5893 |
// Connect the flag usage to the last instruction created. |
| 5894 |
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
5894 |
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
| 5895 |
CurDAG->RemoveDeadNode(Node); |
5895 |
CurDAG->RemoveDeadNode(Node); |
| 5896 |
return; |
5896 |
return; |
| 5897 |
} |
5897 |
} |
| 5898 |
|
5898 |
|
| 5899 |
case ISD::SETCC: { |
5899 |
case ISD::SETCC: { |
| 5900 |
if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue())) |
5900 |
if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue())) |
| 5901 |
return; |
5901 |
return; |
| 5902 |
|
5902 |
|
| 5903 |
break; |
5903 |
break; |
| 5904 |
} |
5904 |
} |
| 5905 |
|
5905 |
|
| 5906 |
case ISD::STORE: |
5906 |
case ISD::STORE: |
| 5907 |
if (foldLoadStoreIntoMemOperand(Node)) |
5907 |
if (foldLoadStoreIntoMemOperand(Node)) |
| 5908 |
return; |
5908 |
return; |
| 5909 |
break; |
5909 |
break; |
| 5910 |
|
5910 |
|
| 5911 |
case X86ISD::SETCC_CARRY: { |
5911 |
case X86ISD::SETCC_CARRY: { |
| 5912 |
MVT VT = Node->getSimpleValueType(0); |
5912 |
MVT VT = Node->getSimpleValueType(0); |
| 5913 |
SDValue Result; |
5913 |
SDValue Result; |
| 5914 |
if (Subtarget->hasSBBDepBreaking()) { |
5914 |
if (Subtarget->hasSBBDepBreaking()) { |
| 5915 |
// We have to do this manually because tblgen will put the eflags copy in |
5915 |
// We have to do this manually because tblgen will put the eflags copy in |
| 5916 |
// the wrong place if we use an extract_subreg in the pattern. |
5916 |
// the wrong place if we use an extract_subreg in the pattern. |
| 5917 |
// Copy flags to the EFLAGS register and glue it to next node. |
5917 |
// Copy flags to the EFLAGS register and glue it to next node. |
| 5918 |
SDValue EFLAGS = |
5918 |
SDValue EFLAGS = |
| 5919 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
5919 |
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
| 5920 |
Node->getOperand(1), SDValue()); |
5920 |
Node->getOperand(1), SDValue()); |
| 5921 |
|
5921 |
|
| 5922 |
// Create a 64-bit instruction if the result is 64-bits otherwise use the |
5922 |
// Create a 64-bit instruction if the result is 64-bits otherwise use the |
| 5923 |
// 32-bit version. |
5923 |
// 32-bit version. |
| 5924 |
unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; |
5924 |
unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; |
| 5925 |
MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
5925 |
MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
| 5926 |
Result = SDValue( |
5926 |
Result = SDValue( |
| 5927 |
CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), |
5927 |
CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), |
| 5928 |
0); |
5928 |
0); |
| 5929 |
} else { |
5929 |
} else { |
| 5930 |
// The target does not recognize sbb with the same reg operand as a |
5930 |
// The target does not recognize sbb with the same reg operand as a |
| 5931 |
// no-source idiom, so we explicitly zero the input values. |
5931 |
// no-source idiom, so we explicitly zero the input values. |
| 5932 |
Result = getSBBZero(Node); |
5932 |
Result = getSBBZero(Node); |
| 5933 |
} |
5933 |
} |
| 5934 |
|
5934 |
|
| 5935 |
// For less than 32-bits we need to extract from the 32-bit node. |
5935 |
// For less than 32-bits we need to extract from the 32-bit node. |
| 5936 |
if (VT == MVT::i8 || VT == MVT::i16) { |
5936 |
if (VT == MVT::i8 || VT == MVT::i16) { |
| 5937 |
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
5937 |
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
| 5938 |
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
5938 |
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
| 5939 |
} |
5939 |
} |
| 5940 |
|
5940 |
|
| 5941 |
ReplaceUses(SDValue(Node, 0), Result); |
5941 |
ReplaceUses(SDValue(Node, 0), Result); |
| 5942 |
CurDAG->RemoveDeadNode(Node); |
5942 |
CurDAG->RemoveDeadNode(Node); |
| 5943 |
return; |
5943 |
return; |
| 5944 |
} |
5944 |
} |
| 5945 |
case X86ISD::SBB: { |
5945 |
case X86ISD::SBB: { |
| 5946 |
if (isNullConstant(Node->getOperand(0)) && |
5946 |
if (isNullConstant(Node->getOperand(0)) && |
| 5947 |
isNullConstant(Node->getOperand(1))) { |
5947 |
isNullConstant(Node->getOperand(1))) { |
| 5948 |
SDValue Result = getSBBZero(Node); |
5948 |
SDValue Result = getSBBZero(Node); |
| 5949 |
|
5949 |
|
| 5950 |
// Replace the flag use. |
5950 |
// Replace the flag use. |
| 5951 |
ReplaceUses(SDValue(Node, 1), Result.getValue(1)); |
5951 |
ReplaceUses(SDValue(Node, 1), Result.getValue(1)); |
| 5952 |
|
5952 |
|
| 5953 |
// Replace the result use. |
5953 |
// Replace the result use. |
| 5954 |
if (!SDValue(Node, 0).use_empty()) { |
5954 |
if (!SDValue(Node, 0).use_empty()) { |
| 5955 |
// For less than 32-bits we need to extract from the 32-bit node. |
5955 |
// For less than 32-bits we need to extract from the 32-bit node. |
| 5956 |
MVT VT = Node->getSimpleValueType(0); |
5956 |
MVT VT = Node->getSimpleValueType(0); |
| 5957 |
if (VT == MVT::i8 || VT == MVT::i16) { |
5957 |
if (VT == MVT::i8 || VT == MVT::i16) { |
| 5958 |
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
5958 |
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
| 5959 |
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
5959 |
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
| 5960 |
} |
5960 |
} |
| 5961 |
ReplaceUses(SDValue(Node, 0), Result); |
5961 |
ReplaceUses(SDValue(Node, 0), Result); |
| 5962 |
} |
5962 |
} |
| 5963 |
|
5963 |
|
| 5964 |
CurDAG->RemoveDeadNode(Node); |
5964 |
CurDAG->RemoveDeadNode(Node); |
| 5965 |
return; |
5965 |
return; |
| 5966 |
} |
5966 |
} |
| 5967 |
break; |
5967 |
break; |
| 5968 |
} |
5968 |
} |
| 5969 |
case X86ISD::MGATHER: { |
5969 |
case X86ISD::MGATHER: { |
| 5970 |
auto *Mgt = cast(Node); |
5970 |
auto *Mgt = cast(Node); |
| 5971 |
SDValue IndexOp = Mgt->getIndex(); |
5971 |
SDValue IndexOp = Mgt->getIndex(); |
| 5972 |
SDValue Mask = Mgt->getMask(); |
5972 |
SDValue Mask = Mgt->getMask(); |
| 5973 |
MVT IndexVT = IndexOp.getSimpleValueType(); |
5973 |
MVT IndexVT = IndexOp.getSimpleValueType(); |
| 5974 |
MVT ValueVT = Node->getSimpleValueType(0); |
5974 |
MVT ValueVT = Node->getSimpleValueType(0); |
| 5975 |
MVT MaskVT = Mask.getSimpleValueType(); |
5975 |
MVT MaskVT = Mask.getSimpleValueType(); |
| 5976 |
|
5976 |
|
| 5977 |
// This is just to prevent crashes if the nodes are malformed somehow. We're |
5977 |
// This is just to prevent crashes if the nodes are malformed somehow. We're |
| 5978 |
// otherwise only doing loose type checking in here based on type what |
5978 |
// otherwise only doing loose type checking in here based on type what |
| 5979 |
// a type constraint would say just like table based isel. |
5979 |
// a type constraint would say just like table based isel. |
| 5980 |
if (!ValueVT.isVector() || !MaskVT.isVector()) |
5980 |
if (!ValueVT.isVector() || !MaskVT.isVector()) |
| 5981 |
break; |
5981 |
break; |
| 5982 |
|
5982 |
|
| 5983 |
unsigned NumElts = ValueVT.getVectorNumElements(); |
5983 |
unsigned NumElts = ValueVT.getVectorNumElements(); |
| 5984 |
MVT ValueSVT = ValueVT.getVectorElementType(); |
5984 |
MVT ValueSVT = ValueVT.getVectorElementType(); |
| 5985 |
|
5985 |
|
| 5986 |
bool IsFP = ValueSVT.isFloatingPoint(); |
5986 |
bool IsFP = ValueSVT.isFloatingPoint(); |
| 5987 |
unsigned EltSize = ValueSVT.getSizeInBits(); |
5987 |
unsigned EltSize = ValueSVT.getSizeInBits(); |
| 5988 |
|
5988 |
|
| 5989 |
unsigned Opc = 0; |
5989 |
unsigned Opc = 0; |
| 5990 |
bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; |
5990 |
bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; |
| 5991 |
if (AVX512Gather) { |
5991 |
if (AVX512Gather) { |
| 5992 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
5992 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
| 5993 |
Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; |
5993 |
Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; |
| 5994 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
5994 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
| 5995 |
Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; |
5995 |
Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; |
| 5996 |
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
5996 |
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
| 5997 |
Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; |
5997 |
Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; |
| 5998 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
5998 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
| 5999 |
Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; |
5999 |
Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; |
| 6000 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
6000 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
| 6001 |
Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; |
6001 |
Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; |
| 6002 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
6002 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
| 6003 |
Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; |
6003 |
Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; |
| 6004 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
6004 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
| 6005 |
Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; |
6005 |
Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; |
| 6006 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
6006 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
| 6007 |
Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; |
6007 |
Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; |
| 6008 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
6008 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
| 6009 |
Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; |
6009 |
Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; |
| 6010 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
6010 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
| 6011 |
Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; |
6011 |
Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; |
| 6012 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
6012 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
| 6013 |
Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; |
6013 |
Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; |
| 6014 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
6014 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
| 6015 |
Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; |
6015 |
Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; |
| 6016 |
} else { |
6016 |
} else { |
| 6017 |
assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && |
6017 |
assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && |
| 6018 |
"Unexpected mask VT!"); |
6018 |
"Unexpected mask VT!"); |
| 6019 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
6019 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
| 6020 |
Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; |
6020 |
Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; |
| 6021 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
6021 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
| 6022 |
Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; |
6022 |
Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; |
| 6023 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
6023 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
| 6024 |
Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; |
6024 |
Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; |
| 6025 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
6025 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
| 6026 |
Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; |
6026 |
Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; |
| 6027 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
6027 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
| 6028 |
Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; |
6028 |
Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; |
| 6029 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
6029 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
| 6030 |
Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; |
6030 |
Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; |
| 6031 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
6031 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
| 6032 |
Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; |
6032 |
Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; |
| 6033 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
6033 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
| 6034 |
Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; |
6034 |
Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; |
| 6035 |
} |
6035 |
} |
| 6036 |
|
6036 |
|
| 6037 |
if (!Opc) |
6037 |
if (!Opc) |
| 6038 |
break; |
6038 |
break; |
| 6039 |
|
6039 |
|
| 6040 |
SDValue Base, Scale, Index, Disp, Segment; |
6040 |
SDValue Base, Scale, Index, Disp, Segment; |
| 6041 |
if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(), |
6041 |
if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(), |
| 6042 |
Base, Scale, Index, Disp, Segment)) |
6042 |
Base, Scale, Index, Disp, Segment)) |
| 6043 |
break; |
6043 |
break; |
| 6044 |
|
6044 |
|
| 6045 |
SDValue PassThru = Mgt->getPassThru(); |
6045 |
SDValue PassThru = Mgt->getPassThru(); |
| 6046 |
SDValue Chain = Mgt->getChain(); |
6046 |
SDValue Chain = Mgt->getChain(); |
| 6047 |
// Gather instructions have a mask output not in the ISD node. |
6047 |
// Gather instructions have a mask output not in the ISD node. |
| 6048 |
SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other); |
6048 |
SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other); |
| 6049 |
|
6049 |
|
| 6050 |
MachineSDNode *NewNode; |
6050 |
MachineSDNode *NewNode; |
| 6051 |
if (AVX512Gather) { |
6051 |
if (AVX512Gather) { |
| 6052 |
SDValue Ops[] = {PassThru, Mask, Base, Scale, |
6052 |
SDValue Ops[] = {PassThru, Mask, Base, Scale, |
| 6053 |
Index, Disp, Segment, Chain}; |
6053 |
Index, Disp, Segment, Chain}; |
| 6054 |
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
6054 |
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
| 6055 |
} else { |
6055 |
} else { |
| 6056 |
SDValue Ops[] = {PassThru, Base, Scale, Index, |
6056 |
SDValue Ops[] = {PassThru, Base, Scale, Index, |
| 6057 |
Disp, Segment, Mask, Chain}; |
6057 |
Disp, Segment, Mask, Chain}; |
| 6058 |
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
6058 |
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
| 6059 |
} |
6059 |
} |
| 6060 |
CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()}); |
6060 |
CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()}); |
| 6061 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
6061 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
| 6062 |
ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2)); |
6062 |
ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2)); |
| 6063 |
CurDAG->RemoveDeadNode(Node); |
6063 |
CurDAG->RemoveDeadNode(Node); |
| 6064 |
return; |
6064 |
return; |
| 6065 |
} |
6065 |
} |
| 6066 |
case X86ISD::MSCATTER: { |
6066 |
case X86ISD::MSCATTER: { |
| 6067 |
auto *Sc = cast(Node); |
6067 |
auto *Sc = cast(Node); |
| 6068 |
SDValue Value = Sc->getValue(); |
6068 |
SDValue Value = Sc->getValue(); |
| 6069 |
SDValue IndexOp = Sc->getIndex(); |
6069 |
SDValue IndexOp = Sc->getIndex(); |
| 6070 |
MVT IndexVT = IndexOp.getSimpleValueType(); |
6070 |
MVT IndexVT = IndexOp.getSimpleValueType(); |
| 6071 |
MVT ValueVT = Value.getSimpleValueType(); |
6071 |
MVT ValueVT = Value.getSimpleValueType(); |
| 6072 |
|
6072 |
|
| 6073 |
// This is just to prevent crashes if the nodes are malformed somehow. We're |
6073 |
// This is just to prevent crashes if the nodes are malformed somehow. We're |
| 6074 |
// otherwise only doing loose type checking in here based on type what |
6074 |
// otherwise only doing loose type checking in here based on type what |
| 6075 |
// a type constraint would say just like table based isel. |
6075 |
// a type constraint would say just like table based isel. |
| 6076 |
if (!ValueVT.isVector()) |
6076 |
if (!ValueVT.isVector()) |
| 6077 |
break; |
6077 |
break; |
| 6078 |
|
6078 |
|
| 6079 |
unsigned NumElts = ValueVT.getVectorNumElements(); |
6079 |
unsigned NumElts = ValueVT.getVectorNumElements(); |
| 6080 |
MVT ValueSVT = ValueVT.getVectorElementType(); |
6080 |
MVT ValueSVT = ValueVT.getVectorElementType(); |
| 6081 |
|
6081 |
|
| 6082 |
bool IsFP = ValueSVT.isFloatingPoint(); |
6082 |
bool IsFP = ValueSVT.isFloatingPoint(); |
| 6083 |
unsigned EltSize = ValueSVT.getSizeInBits(); |
6083 |
unsigned EltSize = ValueSVT.getSizeInBits(); |
| 6084 |
|
6084 |
|
| 6085 |
unsigned Opc; |
6085 |
unsigned Opc; |
| 6086 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
6086 |
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
| 6087 |
Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; |
6087 |
Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; |
| 6088 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
6088 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
| 6089 |
Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; |
6089 |
Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; |
| 6090 |
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
6090 |
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
| 6091 |
Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; |
6091 |
Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; |
| 6092 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
6092 |
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
| 6093 |
Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; |
6093 |
Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; |
| 6094 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
6094 |
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
| 6095 |
Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; |
6095 |
Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; |
| 6096 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
6096 |
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
| 6097 |
Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; |
6097 |
Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; |
| 6098 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
6098 |
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
| 6099 |
Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; |
6099 |
Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; |
| 6100 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
6100 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
| 6101 |
Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; |
6101 |
Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; |
| 6102 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
6102 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
| 6103 |
Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; |
6103 |
Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; |
| 6104 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
6104 |
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
| 6105 |
Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; |
6105 |
Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; |
| 6106 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
6106 |
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
| 6107 |
Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; |
6107 |
Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; |
| 6108 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
6108 |
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
| 6109 |
Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; |
6109 |
Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; |
| 6110 |
else |
6110 |
else |
| 6111 |
break; |
6111 |
break; |
| 6112 |
|
6112 |
|
| 6113 |
SDValue Base, Scale, Index, Disp, Segment; |
6113 |
SDValue Base, Scale, Index, Disp, Segment; |
| 6114 |
if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(), |
6114 |
if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(), |
| 6115 |
Base, Scale, Index, Disp, Segment)) |
6115 |
Base, Scale, Index, Disp, Segment)) |
| 6116 |
break; |
6116 |
break; |
| 6117 |
|
6117 |
|
| 6118 |
SDValue Mask = Sc->getMask(); |
6118 |
SDValue Mask = Sc->getMask(); |
| 6119 |
SDValue Chain = Sc->getChain(); |
6119 |
SDValue Chain = Sc->getChain(); |
| 6120 |
// Scatter instructions have a mask output not in the ISD node. |
6120 |
// Scatter instructions have a mask output not in the ISD node. |
| 6121 |
SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other); |
6121 |
SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other); |
| 6122 |
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; |
6122 |
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; |
| 6123 |
|
6123 |
|
| 6124 |
MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
6124 |
MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
| 6125 |
CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()}); |
6125 |
CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()}); |
| 6126 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1)); |
6126 |
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1)); |
| 6127 |
CurDAG->RemoveDeadNode(Node); |
6127 |
CurDAG->RemoveDeadNode(Node); |
| 6128 |
return; |
6128 |
return; |
| 6129 |
} |
6129 |
} |
| 6130 |
case ISD::PREALLOCATED_SETUP: { |
6130 |
case ISD::PREALLOCATED_SETUP: { |
| 6131 |
auto *MFI = CurDAG->getMachineFunction().getInfo(); |
6131 |
auto *MFI = CurDAG->getMachineFunction().getInfo(); |
| 6132 |
auto CallId = MFI->getPreallocatedIdForCallSite( |
6132 |
auto CallId = MFI->getPreallocatedIdForCallSite( |
| 6133 |
cast(Node->getOperand(1))->getValue()); |
6133 |
cast(Node->getOperand(1))->getValue()); |
| 6134 |
SDValue Chain = Node->getOperand(0); |
6134 |
SDValue Chain = Node->getOperand(0); |
| 6135 |
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
6135 |
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
| 6136 |
MachineSDNode *New = CurDAG->getMachineNode( |
6136 |
MachineSDNode *New = CurDAG->getMachineNode( |
| 6137 |
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain); |
6137 |
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain); |
| 6138 |
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain |
6138 |
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain |
| 6139 |
CurDAG->RemoveDeadNode(Node); |
6139 |
CurDAG->RemoveDeadNode(Node); |
| 6140 |
return; |
6140 |
return; |
| 6141 |
} |
6141 |
} |
| 6142 |
case ISD::PREALLOCATED_ARG: { |
6142 |
case ISD::PREALLOCATED_ARG: { |
| 6143 |
auto *MFI = CurDAG->getMachineFunction().getInfo(); |
6143 |
auto *MFI = CurDAG->getMachineFunction().getInfo(); |
| 6144 |
auto CallId = MFI->getPreallocatedIdForCallSite( |
6144 |
auto CallId = MFI->getPreallocatedIdForCallSite( |
| 6145 |
cast(Node->getOperand(1))->getValue()); |
6145 |
cast(Node->getOperand(1))->getValue()); |
| 6146 |
SDValue Chain = Node->getOperand(0); |
6146 |
SDValue Chain = Node->getOperand(0); |
| 6147 |
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
6147 |
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
| 6148 |
SDValue ArgIndex = Node->getOperand(2); |
6148 |
SDValue ArgIndex = Node->getOperand(2); |
| 6149 |
SDValue Ops[3]; |
6149 |
SDValue Ops[3]; |
| 6150 |
Ops[0] = CallIdValue; |
6150 |
Ops[0] = CallIdValue; |
| 6151 |
Ops[1] = ArgIndex; |
6151 |
Ops[1] = ArgIndex; |
| 6152 |
Ops[2] = Chain; |
6152 |
Ops[2] = Chain; |
| 6153 |
MachineSDNode *New = CurDAG->getMachineNode( |
6153 |
MachineSDNode *New = CurDAG->getMachineNode( |
| 6154 |
TargetOpcode::PREALLOCATED_ARG, dl, |
6154 |
TargetOpcode::PREALLOCATED_ARG, dl, |
| 6155 |
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), |
6155 |
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), |
| 6156 |
MVT::Other), |
6156 |
MVT::Other), |
| 6157 |
Ops); |
6157 |
Ops); |
| 6158 |
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer |
6158 |
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer |
| 6159 |
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain |
6159 |
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain |
| 6160 |
CurDAG->RemoveDeadNode(Node); |
6160 |
CurDAG->RemoveDeadNode(Node); |
| 6161 |
return; |
6161 |
return; |
| 6162 |
} |
6162 |
} |
| 6163 |
case X86ISD::AESENCWIDE128KL: |
6163 |
case X86ISD::AESENCWIDE128KL: |
| 6164 |
case X86ISD::AESDECWIDE128KL: |
6164 |
case X86ISD::AESDECWIDE128KL: |
| 6165 |
case X86ISD::AESENCWIDE256KL: |
6165 |
case X86ISD::AESENCWIDE256KL: |
| 6166 |
case X86ISD::AESDECWIDE256KL: { |
6166 |
case X86ISD::AESDECWIDE256KL: { |
| 6167 |
if (!Subtarget->hasWIDEKL()) |
6167 |
if (!Subtarget->hasWIDEKL()) |
| 6168 |
break; |
6168 |
break; |
| 6169 |
|
6169 |
|
| 6170 |
unsigned Opcode; |
6170 |
unsigned Opcode; |
| 6171 |
switch (Node->getOpcode()) { |
6171 |
switch (Node->getOpcode()) { |
| 6172 |
default: |
6172 |
default: |
| 6173 |
llvm_unreachable("Unexpected opcode!"); |
6173 |
llvm_unreachable("Unexpected opcode!"); |
| 6174 |
case X86ISD::AESENCWIDE128KL: |
6174 |
case X86ISD::AESENCWIDE128KL: |
| 6175 |
Opcode = X86::AESENCWIDE128KL; |
6175 |
Opcode = X86::AESENCWIDE128KL; |
| 6176 |
break; |
6176 |
break; |
| 6177 |
case X86ISD::AESDECWIDE128KL: |
6177 |
case X86ISD::AESDECWIDE128KL: |
| 6178 |
Opcode = X86::AESDECWIDE128KL; |
6178 |
Opcode = X86::AESDECWIDE128KL; |
| 6179 |
break; |
6179 |
break; |
| 6180 |
case X86ISD::AESENCWIDE256KL: |
6180 |
case X86ISD::AESENCWIDE256KL: |
| 6181 |
Opcode = X86::AESENCWIDE256KL; |
6181 |
Opcode = X86::AESENCWIDE256KL; |
| 6182 |
break; |
6182 |
break; |
| 6183 |
case X86ISD::AESDECWIDE256KL: |
6183 |
case X86ISD::AESDECWIDE256KL: |
| 6184 |
Opcode = X86::AESDECWIDE256KL; |
6184 |
Opcode = X86::AESDECWIDE256KL; |
| 6185 |
break; |
6185 |
break; |
| 6186 |
} |
6186 |
} |
| 6187 |
|
6187 |
|
| 6188 |
SDValue Chain = Node->getOperand(0); |
6188 |
SDValue Chain = Node->getOperand(0); |
| 6189 |
SDValue Addr = Node->getOperand(1); |
6189 |
SDValue Addr = Node->getOperand(1); |
| 6190 |
|
6190 |
|
| 6191 |
SDValue Base, Scale, Index, Disp, Segment; |
6191 |
SDValue Base, Scale, Index, Disp, Segment; |
| 6192 |
if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment)) |
6192 |
if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment)) |
| 6193 |
break; |
6193 |
break; |
| 6194 |
|
6194 |
|
| 6195 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2), |
6195 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2), |
| 6196 |
SDValue()); |
6196 |
SDValue()); |
| 6197 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3), |
6197 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3), |
| 6198 |
Chain.getValue(1)); |
6198 |
Chain.getValue(1)); |
| 6199 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4), |
6199 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4), |
| 6200 |
Chain.getValue(1)); |
6200 |
Chain.getValue(1)); |
| 6201 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5), |
6201 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5), |
| 6202 |
Chain.getValue(1)); |
6202 |
Chain.getValue(1)); |
| 6203 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6), |
6203 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6), |
| 6204 |
Chain.getValue(1)); |
6204 |
Chain.getValue(1)); |
| 6205 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7), |
6205 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7), |
| 6206 |
Chain.getValue(1)); |
6206 |
Chain.getValue(1)); |
| 6207 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8), |
6207 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8), |
| 6208 |
Chain.getValue(1)); |
6208 |
Chain.getValue(1)); |
| 6209 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), |
6209 |
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), |
| 6210 |
Chain.getValue(1)); |
6210 |
Chain.getValue(1)); |
| 6211 |
|
6211 |
|
| 6212 |
MachineSDNode *Res = CurDAG->getMachineNode( |
6212 |
MachineSDNode *Res = CurDAG->getMachineNode( |
| 6213 |
Opcode, dl, Node->getVTList(), |
6213 |
Opcode, dl, Node->getVTList(), |
| 6214 |
{Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); |
6214 |
{Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); |
| 6215 |
CurDAG->setNodeMemRefs(Res, cast(Node)->getMemOperand()); |
6215 |
CurDAG->setNodeMemRefs(Res, cast(Node)->getMemOperand()); |
| 6216 |
ReplaceNode(Node, Res); |
6216 |
ReplaceNode(Node, Res); |
| 6217 |
return; |
6217 |
return; |
| 6218 |
} |
6218 |
} |
| 6219 |
} |
6219 |
} |
| 6220 |
|
6220 |
|
| 6221 |
SelectCode(Node); |
6221 |
SelectCode(Node); |
| 6222 |
} |
6222 |
} |
| 6223 |
|
6223 |
|
| 6224 |
bool X86DAGToDAGISel:: |
6224 |
bool X86DAGToDAGISel:: |
| 6225 |
SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
6225 |
SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
| 6226 |
std::vector &OutOps) { |
6226 |
std::vector &OutOps) { |
| 6227 |
SDValue Op0, Op1, Op2, Op3, Op4; |
6227 |
SDValue Op0, Op1, Op2, Op3, Op4; |
| 6228 |
switch (ConstraintID) { |
6228 |
switch (ConstraintID) { |
| 6229 |
default: |
6229 |
default: |
| 6230 |
llvm_unreachable("Unexpected asm memory constraint"); |
6230 |
llvm_unreachable("Unexpected asm memory constraint"); |
| 6231 |
case InlineAsm::Constraint_o: // offsetable ?? |
6231 |
case InlineAsm::Constraint_o: // offsetable ?? |
| 6232 |
case InlineAsm::Constraint_v: // not offsetable ?? |
6232 |
case InlineAsm::Constraint_v: // not offsetable ?? |
| 6233 |
case InlineAsm::Constraint_m: // memory |
6233 |
case InlineAsm::Constraint_m: // memory |
| 6234 |
case InlineAsm::Constraint_X: |
6234 |
case InlineAsm::Constraint_X: |
| 6235 |
case InlineAsm::Constraint_p: // address |
6235 |
case InlineAsm::Constraint_p: // address |
| 6236 |
if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) |
6236 |
if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) |
| 6237 |
return true; |
6237 |
return true; |
| 6238 |
break; |
6238 |
break; |
| 6239 |
} |
6239 |
} |
| 6240 |
|
6240 |
|
| 6241 |
OutOps.push_back(Op0); |
6241 |
OutOps.push_back(Op0); |
| 6242 |
OutOps.push_back(Op1); |
6242 |
OutOps.push_back(Op1); |
| 6243 |
OutOps.push_back(Op2); |
6243 |
OutOps.push_back(Op2); |
| 6244 |
OutOps.push_back(Op3); |
6244 |
OutOps.push_back(Op3); |
| 6245 |
OutOps.push_back(Op4); |
6245 |
OutOps.push_back(Op4); |
| 6246 |
return false; |
6246 |
return false; |
| 6247 |
} |
6247 |
} |
| 6248 |
|
6248 |
|
| 6249 |
/// This pass converts a legalized DAG into a X86-specific DAG, |
6249 |
/// This pass converts a legalized DAG into a X86-specific DAG, |
| 6250 |
/// ready for instruction scheduling. |
6250 |
/// ready for instruction scheduling. |
| 6251 |
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, |
6251 |
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, |
| 6252 |
CodeGenOpt::Level OptLevel) { |
6252 |
CodeGenOpt::Level OptLevel) { |
| 6253 |
return new X86DAGToDAGISel(TM, OptLevel); |
6253 |
return new X86DAGToDAGISel(TM, OptLevel); |
| 6254 |
} |
6254 |
} |
| 6255 |
|
6255 |
|